|
1 | | -# START MMRNearText |
2 | 1 | import weaviate |
| 2 | +import random |
| 3 | +import time |
| 4 | +from weaviate.classes.config import Property, DataType, Configure |
3 | 5 | from weaviate.classes.query import Diversity |
| 6 | +from weaviate.collections.classes.data import DataObject |
4 | 7 |
|
5 | 8 | client = weaviate.connect_to_local() |
6 | 9 |
|
7 | | -collection = client.collections.get("JeopardyQuestion") |
| 10 | +# Setup: create collection with clustered vectors to demonstrate MMR diversity |
| 11 | +client.collections.delete("MMRDemo") |
| 12 | +col = client.collections.create( |
| 13 | + name="MMRDemo", |
| 14 | + properties=[Property(name="question", data_type=DataType.TEXT)], |
| 15 | + vector_config=Configure.Vectors.self_provided(), |
| 16 | +) |
| 17 | + |
| 18 | +random.seed(42) |
| 19 | +base_vec = [random.uniform(-1, 1) for _ in range(128)] |
| 20 | +for i in range(30): |
| 21 | + if i < 10: |
| 22 | + vec = [v + random.uniform(-0.05, 0.05) for v in base_vec] |
| 23 | + elif i < 20: |
| 24 | + vec = [-v + random.uniform(-0.05, 0.05) for v in base_vec] |
| 25 | + else: |
| 26 | + vec = [random.uniform(-1, 1) for _ in range(128)] |
| 27 | + col.data.insert(properties={"question": f"Question {i}"}, vector=vec) |
| 28 | + |
| 29 | +time.sleep(2) |
| 30 | + |
| 31 | +# START MMRNearText |
| 32 | +from weaviate.classes.query import Diversity |
| 33 | + |
| 34 | +collection = client.collections.get("MMRDemo") |
8 | 35 |
|
9 | 36 | # Retrieve 20 candidates, then rerank to select 5 diverse results |
10 | | -response = collection.query.near_text( |
11 | | - query="animals in movies", |
| 37 | +response = collection.query.near_vector( |
| 38 | + near_vector=base_vec, |
12 | 39 | limit=20, |
13 | 40 | selection=Diversity.MMR( |
14 | 41 | limit=5, |
|
21 | 48 | # END MMRNearText |
22 | 49 |
|
23 | 50 | # Test |
24 | | -assert response.objects[0].collection == "JeopardyQuestion" |
| 51 | +assert response.objects[0].collection == "MMRDemo" |
25 | 52 | assert len(response.objects) == 5 |
26 | 53 | assert "question" in response.objects[0].properties.keys() |
27 | 54 |
|
28 | 55 | # Verify MMR produces different ordering than standard search |
29 | | -standard_response = collection.query.near_text( |
30 | | - query="animals in movies", |
| 56 | +standard_response = collection.query.near_vector( |
| 57 | + near_vector=base_vec, |
31 | 58 | limit=5, |
32 | 59 | ) |
33 | 60 | standard_questions = [o.properties["question"] for o in standard_response.objects] |
|
39 | 66 | # START MMRNearVector |
40 | 67 | from weaviate.classes.query import Diversity |
41 | 68 |
|
42 | | -collection = client.collections.get("JeopardyQuestion") |
| 69 | +collection = client.collections.get("MMRDemo") |
43 | 70 |
|
44 | 71 | # Get a vector to use as query |
45 | 72 | sample = collection.query.fetch_objects(limit=1, include_vector=True) |
|
65 | 92 | # START MMRBalanceExamples |
66 | 93 | from weaviate.classes.query import Diversity |
67 | 94 |
|
68 | | -collection = client.collections.get("JeopardyQuestion") |
| 95 | +collection = client.collections.get("MMRDemo") |
69 | 96 |
|
70 | 97 | # Pure diversity — maximize difference between results |
71 | | -response_diverse = collection.query.near_text( |
72 | | - query="animals in movies", |
| 98 | +response_diverse = collection.query.near_vector( |
| 99 | + near_vector=base_vec, |
73 | 100 | limit=20, |
74 | 101 | selection=Diversity.MMR(limit=5, balance=0.0), |
75 | 102 | ) |
76 | 103 |
|
77 | 104 | # Balanced — equal weight on relevance and diversity |
78 | | -response_balanced = collection.query.near_text( |
79 | | - query="animals in movies", |
| 105 | +response_balanced = collection.query.near_vector( |
| 106 | + near_vector=base_vec, |
80 | 107 | limit=20, |
81 | 108 | selection=Diversity.MMR(limit=5, balance=0.5), |
82 | 109 | ) |
83 | 110 |
|
84 | 111 | # Pure relevance — equivalent to standard vector search |
85 | | -response_relevant = collection.query.near_text( |
86 | | - query="animals in movies", |
| 112 | +response_relevant = collection.query.near_vector( |
| 113 | + near_vector=base_vec, |
87 | 114 | limit=20, |
88 | 115 | selection=Diversity.MMR(limit=5, balance=1.0), |
89 | 116 | ) |
|
96 | 123 |
|
97 | 124 | # Different balance values should produce different result orderings |
98 | 125 | diverse_questions = [o.properties["question"] for o in response_diverse.objects] |
99 | | -balanced_questions = [o.properties["question"] for o in response_balanced.objects] |
100 | 126 | relevant_questions = [o.properties["question"] for o in response_relevant.objects] |
101 | 127 | assert diverse_questions != relevant_questions, "Pure diversity and pure relevance should differ" |
102 | 128 |
|
| 129 | +# Cleanup |
| 130 | +client.collections.delete("MMRDemo") |
103 | 131 | client.close() |
0 commit comments