Skip to content

Commit 7384495

Browse files
committed
Update MMR tests
1 parent f6fae19 commit 7384495

2 files changed

Lines changed: 55 additions & 16 deletions

File tree

_includes/code/howto/search.similarity.mmr.py

Lines changed: 44 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,41 @@
1-
# START MMRNearText
21
import weaviate
2+
import random
3+
import time
4+
from weaviate.classes.config import Property, DataType, Configure
35
from weaviate.classes.query import Diversity
6+
from weaviate.collections.classes.data import DataObject
47

58
client = weaviate.connect_to_local()
69

7-
collection = client.collections.get("JeopardyQuestion")
10+
# Setup: create collection with clustered vectors to demonstrate MMR diversity
11+
client.collections.delete("MMRDemo")
12+
col = client.collections.create(
13+
name="MMRDemo",
14+
properties=[Property(name="question", data_type=DataType.TEXT)],
15+
vector_config=Configure.Vectors.self_provided(),
16+
)
17+
18+
random.seed(42)
19+
base_vec = [random.uniform(-1, 1) for _ in range(128)]
20+
for i in range(30):
21+
if i < 10:
22+
vec = [v + random.uniform(-0.05, 0.05) for v in base_vec]
23+
elif i < 20:
24+
vec = [-v + random.uniform(-0.05, 0.05) for v in base_vec]
25+
else:
26+
vec = [random.uniform(-1, 1) for _ in range(128)]
27+
col.data.insert(properties={"question": f"Question {i}"}, vector=vec)
28+
29+
time.sleep(2)
30+
31+
# START MMRNearText
32+
from weaviate.classes.query import Diversity
33+
34+
collection = client.collections.get("MMRDemo")
835

936
# Retrieve 20 candidates, then rerank to select 5 diverse results
10-
response = collection.query.near_text(
11-
query="animals in movies",
37+
response = collection.query.near_vector(
38+
near_vector=base_vec,
1239
limit=20,
1340
selection=Diversity.MMR(
1441
limit=5,
@@ -21,13 +48,13 @@
2148
# END MMRNearText
2249

2350
# Test
24-
assert response.objects[0].collection == "JeopardyQuestion"
51+
assert response.objects[0].collection == "MMRDemo"
2552
assert len(response.objects) == 5
2653
assert "question" in response.objects[0].properties.keys()
2754

2855
# Verify MMR produces different ordering than standard search
29-
standard_response = collection.query.near_text(
30-
query="animals in movies",
56+
standard_response = collection.query.near_vector(
57+
near_vector=base_vec,
3158
limit=5,
3259
)
3360
standard_questions = [o.properties["question"] for o in standard_response.objects]
@@ -39,7 +66,7 @@
3966
# START MMRNearVector
4067
from weaviate.classes.query import Diversity
4168

42-
collection = client.collections.get("JeopardyQuestion")
69+
collection = client.collections.get("MMRDemo")
4370

4471
# Get a vector to use as query
4572
sample = collection.query.fetch_objects(limit=1, include_vector=True)
@@ -65,25 +92,25 @@
6592
# START MMRBalanceExamples
6693
from weaviate.classes.query import Diversity
6794

68-
collection = client.collections.get("JeopardyQuestion")
95+
collection = client.collections.get("MMRDemo")
6996

7097
# Pure diversity — maximize difference between results
71-
response_diverse = collection.query.near_text(
72-
query="animals in movies",
98+
response_diverse = collection.query.near_vector(
99+
near_vector=base_vec,
73100
limit=20,
74101
selection=Diversity.MMR(limit=5, balance=0.0),
75102
)
76103

77104
# Balanced — equal weight on relevance and diversity
78-
response_balanced = collection.query.near_text(
79-
query="animals in movies",
105+
response_balanced = collection.query.near_vector(
106+
near_vector=base_vec,
80107
limit=20,
81108
selection=Diversity.MMR(limit=5, balance=0.5),
82109
)
83110

84111
# Pure relevance — equivalent to standard vector search
85-
response_relevant = collection.query.near_text(
86-
query="animals in movies",
112+
response_relevant = collection.query.near_vector(
113+
near_vector=base_vec,
87114
limit=20,
88115
selection=Diversity.MMR(limit=5, balance=1.0),
89116
)
@@ -96,8 +123,9 @@
96123

97124
# Different balance values should produce different result orderings
98125
diverse_questions = [o.properties["question"] for o in response_diverse.objects]
99-
balanced_questions = [o.properties["question"] for o in response_balanced.objects]
100126
relevant_questions = [o.properties["question"] for o in response_relevant.objects]
101127
assert diverse_questions != relevant_questions, "Pure diversity and pure relevance should differ"
102128

129+
# Cleanup
130+
client.collections.delete("MMRDemo")
103131
client.close()

tests/test_python.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,17 @@ def test_search(empty_weaviates, script_loc):
193193
run_py_script(script_loc, custom_replace_pairs=utils.edu_readonly_replacements)
194194

195195

196+
@pytest.mark.pyv4
197+
@pytest.mark.parametrize(
198+
"script_loc",
199+
[
200+
"./_includes/code/howto/search.similarity.mmr.py",
201+
],
202+
)
203+
def test_search_mmr(empty_weaviates, script_loc):
204+
run_py_script(script_loc)
205+
206+
196207
# ========== Starter Guides ==========
197208

198209
@pytest.mark.pyv4

0 commit comments

Comments
 (0)