Skip to content

Commit 17597cc

Browse files
committed
Reuse TantivySearcher across queries via searcher pool
1 parent beedbf8 commit 17597cc

File tree

2 files changed

+46
-37
lines changed

2 files changed

+46
-37
lines changed

paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivyFullTextGlobalIndexer.java

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,6 @@ public class TantivyFullTextGlobalIndexer implements GlobalIndexer {
3535
private final Map<String, ArchiveLayout> layoutCache = new ConcurrentHashMap<>();
3636
private final TantivySearcherPool searcherPool;
3737

38-
public TantivyFullTextGlobalIndexer() {
39-
this(
40-
new TantivySearcherPool(
41-
TantivyFullTextIndexOptions.SEARCHER_POOL_MAX_SIZE.defaultValue()));
42-
}
43-
4438
public TantivyFullTextGlobalIndexer(TantivySearcherPool searcherPool) {
4539
this.searcherPool = searcherPool;
4640
}

paimon-tantivy/paimon-tantivy-index/src/test/java/org/apache/paimon/tantivy/index/TantivyFullTextGlobalIndexTest.java

Lines changed: 46 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,10 @@
4141
import java.io.IOException;
4242
import java.util.Collections;
4343
import java.util.List;
44+
import java.util.Map;
4445
import java.util.Optional;
4546
import java.util.UUID;
47+
import java.util.concurrent.ConcurrentHashMap;
4648

4749
import static org.assertj.core.api.Assertions.assertThat;
4850
import static org.junit.jupiter.api.Assumptions.assumeTrue;
@@ -70,11 +72,15 @@ private static boolean isNativeAvailable() {
7072

7173
private FileIO fileIO;
7274
private Path indexPath;
75+
private Map<String, ArchiveLayout> layoutCache;
76+
private TantivySearcherPool pool;
7377

7478
@BeforeEach
7579
public void setup() {
7680
fileIO = new LocalFileIO();
7781
indexPath = new Path(tempDir.toString());
82+
layoutCache = new ConcurrentHashMap<>();
83+
pool = new TantivySearcherPool(4);
7884
}
7985

8086
@AfterEach
@@ -111,6 +117,11 @@ private List<GlobalIndexIOMeta> toIOMetas(List<ResultEntry> results, Path path)
111117
new GlobalIndexIOMeta(filePath, fileIO.getFileSize(filePath), result.meta()));
112118
}
113119

120+
private TantivyFullTextGlobalIndexReader createReader(
121+
GlobalIndexFileReader fileReader, List<GlobalIndexIOMeta> metas) {
122+
return new TantivyFullTextGlobalIndexReader(fileReader, metas, layoutCache, pool);
123+
}
124+
114125
@Test
115126
public void testEndToEnd() throws IOException {
116127
GlobalIndexFileWriter fileWriter = createFileWriter(indexPath);
@@ -127,12 +138,7 @@ public void testEndToEnd() throws IOException {
127138
List<GlobalIndexIOMeta> metas = toIOMetas(results, indexPath);
128139
GlobalIndexFileReader fileReader = createFileReader();
129140

130-
try (TantivyFullTextGlobalIndexReader reader =
131-
new TantivyFullTextGlobalIndexReader(
132-
fileReader,
133-
metas,
134-
new java.util.concurrent.ConcurrentHashMap<>(),
135-
new TantivySearcherPool(4))) {
141+
try (TantivyFullTextGlobalIndexReader reader = createReader(fileReader, metas)) {
136142
FullTextSearch search = new FullTextSearch("paimon", 10, "text");
137143
Optional<ScoredGlobalIndexResult> searchResult = reader.visitFullTextSearch(search);
138144
assertThat(searchResult).isPresent();
@@ -164,12 +170,7 @@ public void testSearchNoResults() throws IOException {
164170
List<GlobalIndexIOMeta> metas = toIOMetas(results, indexPath);
165171
GlobalIndexFileReader fileReader = createFileReader();
166172

167-
try (TantivyFullTextGlobalIndexReader reader =
168-
new TantivyFullTextGlobalIndexReader(
169-
fileReader,
170-
metas,
171-
new java.util.concurrent.ConcurrentHashMap<>(),
172-
new TantivySearcherPool(4))) {
173+
try (TantivyFullTextGlobalIndexReader reader = createReader(fileReader, metas)) {
173174
FullTextSearch search = new FullTextSearch("nonexistent", 10, "text");
174175
Optional<ScoredGlobalIndexResult> searchResult = reader.visitFullTextSearch(search);
175176
assertThat(searchResult).isPresent();
@@ -194,12 +195,7 @@ public void testNullFieldSkipped() throws IOException {
194195
List<GlobalIndexIOMeta> metas = toIOMetas(results, indexPath);
195196
GlobalIndexFileReader fileReader = createFileReader();
196197

197-
try (TantivyFullTextGlobalIndexReader reader =
198-
new TantivyFullTextGlobalIndexReader(
199-
fileReader,
200-
metas,
201-
new java.util.concurrent.ConcurrentHashMap<>(),
202-
new TantivySearcherPool(4))) {
198+
try (TantivyFullTextGlobalIndexReader reader = createReader(fileReader, metas)) {
203199
FullTextSearch search = new FullTextSearch("paimon", 10, "text");
204200
Optional<ScoredGlobalIndexResult> searchResult = reader.visitFullTextSearch(search);
205201
assertThat(searchResult).isPresent();
@@ -243,12 +239,7 @@ public void testLargeDataset() throws IOException {
243239
List<GlobalIndexIOMeta> metas = toIOMetas(results, indexPath);
244240
GlobalIndexFileReader fileReader = createFileReader();
245241

246-
try (TantivyFullTextGlobalIndexReader reader =
247-
new TantivyFullTextGlobalIndexReader(
248-
fileReader,
249-
metas,
250-
new java.util.concurrent.ConcurrentHashMap<>(),
251-
new TantivySearcherPool(4))) {
242+
try (TantivyFullTextGlobalIndexReader reader = createReader(fileReader, metas)) {
252243
// Search for the special keyword — should match every 10th doc
253244
FullTextSearch search = new FullTextSearch("special_keyword", 1000, "text");
254245
Optional<ScoredGlobalIndexResult> searchResult = reader.visitFullTextSearch(search);
@@ -277,12 +268,7 @@ public void testLimitRespected() throws IOException {
277268
List<GlobalIndexIOMeta> metas = toIOMetas(results, indexPath);
278269
GlobalIndexFileReader fileReader = createFileReader();
279270

280-
try (TantivyFullTextGlobalIndexReader reader =
281-
new TantivyFullTextGlobalIndexReader(
282-
fileReader,
283-
metas,
284-
new java.util.concurrent.ConcurrentHashMap<>(),
285-
new TantivySearcherPool(4))) {
271+
try (TantivyFullTextGlobalIndexReader reader = createReader(fileReader, metas)) {
286272
// Limit to 5 results
287273
FullTextSearch search = new FullTextSearch("paimon", 5, "text");
288274
Optional<ScoredGlobalIndexResult> searchResult = reader.visitFullTextSearch(search);
@@ -293,9 +279,38 @@ public void testLimitRespected() throws IOException {
293279
}
294280
}
295281

282+
@Test
283+
public void testPoolReuse() throws IOException {
284+
GlobalIndexFileWriter fileWriter = createFileWriter(indexPath);
285+
TantivyFullTextGlobalIndexWriter writer = new TantivyFullTextGlobalIndexWriter(fileWriter);
286+
writer.write(BinaryString.fromString("Apache Paimon streaming lake"));
287+
writer.write(BinaryString.fromString("Tantivy full-text search"));
288+
289+
List<ResultEntry> results = writer.finish();
290+
List<GlobalIndexIOMeta> metas = toIOMetas(results, indexPath);
291+
GlobalIndexFileReader fileReader = createFileReader();
292+
FullTextSearch search = new FullTextSearch("paimon", 10, "text");
293+
294+
// First query: pool miss, searcher is loaded and returned to pool on close.
295+
try (TantivyFullTextGlobalIndexReader reader = createReader(fileReader, metas)) {
296+
Optional<ScoredGlobalIndexResult> result = reader.visitFullTextSearch(search);
297+
assertThat(result).isPresent();
298+
assertThat(result.get().results().contains(0L)).isTrue();
299+
}
300+
301+
// Second query: pool hit, reuses the same searcher — results must be identical.
302+
try (TantivyFullTextGlobalIndexReader reader = createReader(fileReader, metas)) {
303+
Optional<ScoredGlobalIndexResult> result = reader.visitFullTextSearch(search);
304+
assertThat(result).isPresent();
305+
assertThat(result.get().results().getLongCardinality()).isEqualTo(1);
306+
assertThat(result.get().results().contains(0L)).isTrue();
307+
}
308+
}
309+
296310
@Test
297311
public void testViaIndexer() throws IOException {
298-
TantivyFullTextGlobalIndexer indexer = new TantivyFullTextGlobalIndexer();
312+
TantivyFullTextGlobalIndexer indexer =
313+
new TantivyFullTextGlobalIndexer(new TantivySearcherPool(0));
299314

300315
GlobalIndexFileWriter fileWriter = createFileWriter(indexPath);
301316
TantivyFullTextGlobalIndexWriter writer =

0 commit comments

Comments
 (0)