Skip to content

Commit 91b10f4

Browse files
Copilotdevlux76
andcommitted
fix: eliminate double IDB reads in hierarchical query routing, relax book membership assertion
Ranking functions (rankShelves/rankVolumes/rankBooks/rankPages/spillToWarm) now return RankedResult { id, score, childIds } so Query.ts can use child IDs from the ranking pass directly instead of re-fetching entities from IDB. Test assertion for page-to-book membership relaxed from "exactly one" to "at least one" per user feedback — pages can legitimately appear in multiple books (e.g., legal corpora with cross-citations). Co-authored-by: devlux76 <[email protected]>
1 parent 4335f67 commit 91b10f4

File tree

3 files changed

+33
-35
lines changed

3 files changed

+33
-35
lines changed

lib/cortex/Query.ts

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import type { EmbeddingRunner } from "../embeddings/EmbeddingRunner";
44
import { runPromotionSweep } from "../core/SalienceEngine";
55
import { computeSubgraphBounds } from "../core/HotpathPolicy";
66
import type { QueryResult } from "./QueryResult";
7-
import { rankPages, rankBooks, rankVolumes, rankShelves, spillToWarm } from "./Ranking";
7+
import { rankPages, rankBooks, rankVolumes, rankShelves, spillToWarm, type RankedResult } from "./Ranking";
88
import { buildMetroid } from "./MetroidBuilder";
99
import { detectKnowledgeGap } from "./KnowledgeGapDetector";
1010
import { solveOpenTSP } from "./OpenTSPSolver";
@@ -64,10 +64,7 @@ export async function query(
6464
rankingOptions,
6565
);
6666
for (const s of topShelves) {
67-
const shelf = await metadataStore.getShelf(s.id);
68-
if (shelf) {
69-
for (const vid of shelf.volumeIds) volumeIdsFromShelves.add(vid);
70-
}
67+
for (const vid of s.childIds) volumeIdsFromShelves.add(vid);
7168
}
7269
}
7370

@@ -86,10 +83,7 @@ export async function query(
8683
rankingOptions,
8784
);
8885
for (const v of topVolumes) {
89-
const volume = await metadataStore.getVolume(v.id);
90-
if (volume) {
91-
for (const bid of volume.bookIds) bookIdsFromVolumes.add(bid);
92-
}
86+
for (const bid of v.childIds) bookIdsFromVolumes.add(bid);
9387
}
9488
}
9589

@@ -108,10 +102,7 @@ export async function query(
108102
rankingOptions,
109103
);
110104
for (const b of topBooks) {
111-
const book = await metadataStore.getBook(b.id);
112-
if (book) {
113-
for (const pid of book.pageIds) pageIdsFromBooks.add(pid);
114-
}
105+
for (const pid of b.childIds) pageIdsFromBooks.add(pid);
115106
}
116107
}
117108

@@ -123,7 +114,7 @@ export async function query(
123114
const seenIds = new Set(hotResults.map((r) => r.id));
124115

125116
// --- Warm spill: fill up to topK if hot path is insufficient ---
126-
let warmResults: Array<{ id: Hash; score: number }> = [];
117+
let warmResults: RankedResult[] = [];
127118
if (hotResults.length < topK) {
128119
const allWarm = await spillToWarm("page", queryEmbedding, topK, rankingOptions);
129120
warmResults = allWarm.filter((r) => !seenIds.has(r.id));

lib/cortex/Ranking.ts

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,34 +21,42 @@ function cosineSimilarity(a: Float32Array, b: Float32Array): number {
2121
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
2222
}
2323

24+
export interface RankedResult {
25+
id: Hash;
26+
score: number;
27+
/** Child IDs from the ranked entity (volumeIds / bookIds / pageIds). */
28+
childIds: Hash[];
29+
}
30+
2431
function pickTopK(
25-
scored: Array<{ id: Hash; score: number }>,
32+
scored: RankedResult[],
2633
k: number,
27-
): Array<{ id: Hash; score: number }> {
34+
): RankedResult[] {
2835
scored.sort((a, b) => b.score - a.score || a.id.localeCompare(b.id));
2936
return scored.slice(0, k);
3037
}
3138

3239
/**
3340
* Ranks shelves by cosine similarity of their routing prototype to the query.
3441
* Uses routingPrototypeOffsets[0] as the representative vector.
42+
* Returns child volumeIds alongside each scored shelf.
3543
*/
3644
export async function rankShelves(
3745
queryEmbedding: Float32Array,
3846
residentShelfIds: Hash[],
3947
topK: number,
4048
options: RankingOptions,
41-
): Promise<Array<{ id: Hash; score: number }>> {
49+
): Promise<RankedResult[]> {
4250
if (residentShelfIds.length === 0) return [];
4351

4452
const { vectorStore, metadataStore } = options;
45-
const scored: Array<{ id: Hash; score: number }> = [];
53+
const scored: RankedResult[] = [];
4654

4755
for (const shelfId of residentShelfIds) {
4856
const shelf = await metadataStore.getShelf(shelfId);
4957
if (!shelf || shelf.routingPrototypeOffsets.length === 0) continue;
5058
const vec = await vectorStore.readVector(shelf.routingPrototypeOffsets[0], shelf.routingDim);
51-
scored.push({ id: shelfId, score: cosineSimilarity(queryEmbedding, vec) });
59+
scored.push({ id: shelfId, score: cosineSimilarity(queryEmbedding, vec), childIds: shelf.volumeIds });
5260
}
5361

5462
return pickTopK(scored, topK);
@@ -57,49 +65,51 @@ export async function rankShelves(
5765
/**
5866
* Ranks volumes by cosine similarity of their first prototype to the query.
5967
* Uses prototypeOffsets[0] as the representative vector.
68+
* Returns child bookIds alongside each scored volume.
6069
*/
6170
export async function rankVolumes(
6271
queryEmbedding: Float32Array,
6372
residentVolumeIds: Hash[],
6473
topK: number,
6574
options: RankingOptions,
66-
): Promise<Array<{ id: Hash; score: number }>> {
75+
): Promise<RankedResult[]> {
6776
if (residentVolumeIds.length === 0) return [];
6877

6978
const { vectorStore, metadataStore } = options;
70-
const scored: Array<{ id: Hash; score: number }> = [];
79+
const scored: RankedResult[] = [];
7180

7281
for (const volumeId of residentVolumeIds) {
7382
const volume = await metadataStore.getVolume(volumeId);
7483
if (!volume || volume.prototypeOffsets.length === 0) continue;
7584
const vec = await vectorStore.readVector(volume.prototypeOffsets[0], volume.prototypeDim);
76-
scored.push({ id: volumeId, score: cosineSimilarity(queryEmbedding, vec) });
85+
scored.push({ id: volumeId, score: cosineSimilarity(queryEmbedding, vec), childIds: volume.bookIds });
7786
}
7887

7988
return pickTopK(scored, topK);
8089
}
8190

8291
/**
8392
* Ranks books by cosine similarity of their medoid page embedding to the query.
93+
* Returns child pageIds alongside each scored book.
8494
*/
8595
export async function rankBooks(
8696
queryEmbedding: Float32Array,
8797
residentBookIds: Hash[],
8898
topK: number,
8999
options: RankingOptions,
90-
): Promise<Array<{ id: Hash; score: number }>> {
100+
): Promise<RankedResult[]> {
91101
if (residentBookIds.length === 0) return [];
92102

93103
const { vectorStore, metadataStore } = options;
94-
const scored: Array<{ id: Hash; score: number }> = [];
104+
const scored: RankedResult[] = [];
95105

96106
for (const bookId of residentBookIds) {
97107
const book = await metadataStore.getBook(bookId);
98108
if (!book) continue;
99109
const medoidPage = await metadataStore.getPage(book.medoidPageId);
100110
if (!medoidPage) continue;
101111
const vec = await vectorStore.readVector(medoidPage.embeddingOffset, medoidPage.embeddingDim);
102-
scored.push({ id: bookId, score: cosineSimilarity(queryEmbedding, vec) });
112+
scored.push({ id: bookId, score: cosineSimilarity(queryEmbedding, vec), childIds: book.pageIds });
103113
}
104114

105115
return pickTopK(scored, topK);
@@ -113,17 +123,17 @@ export async function rankPages(
113123
residentPageIds: Hash[],
114124
topK: number,
115125
options: RankingOptions,
116-
): Promise<Array<{ id: Hash; score: number }>> {
126+
): Promise<RankedResult[]> {
117127
if (residentPageIds.length === 0) return [];
118128

119129
const { vectorStore, metadataStore } = options;
120-
const scored: Array<{ id: Hash; score: number }> = [];
130+
const scored: RankedResult[] = [];
121131

122132
for (const pageId of residentPageIds) {
123133
const page = await metadataStore.getPage(pageId);
124134
if (!page) continue;
125135
const vec = await vectorStore.readVector(page.embeddingOffset, page.embeddingDim);
126-
scored.push({ id: pageId, score: cosineSimilarity(queryEmbedding, vec) });
136+
scored.push({ id: pageId, score: cosineSimilarity(queryEmbedding, vec), childIds: [] });
127137
}
128138

129139
return pickTopK(scored, topK);
@@ -139,17 +149,17 @@ export async function spillToWarm(
139149
queryEmbedding: Float32Array,
140150
topK: number,
141151
options: RankingOptions,
142-
): Promise<Array<{ id: Hash; score: number }>> {
152+
): Promise<RankedResult[]> {
143153
if (tier !== "page") return [];
144154

145155
const { vectorStore, metadataStore } = options;
146156
const allPages = await metadataStore.getAllPages();
147157
if (allPages.length === 0) return [];
148158

149-
const scored: Array<{ id: Hash; score: number }> = [];
159+
const scored: RankedResult[] = [];
150160
for (const page of allPages) {
151161
const vec = await vectorStore.readVector(page.embeddingOffset, page.embeddingDim);
152-
scored.push({ id: page.pageId, score: cosineSimilarity(queryEmbedding, vec) });
162+
scored.push({ id: page.pageId, score: cosineSimilarity(queryEmbedding, vec), childIds: [] });
153163
}
154164

155165
return pickTopK(scored, topK);

tests/integration/IngestQuery.test.ts

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -424,14 +424,11 @@ describe("integration (v0.5): hierarchical and dialectical ingest/query", () =>
424424
expect(result.books.length).toBeGreaterThanOrEqual(1);
425425
expect(result.book).toBeDefined();
426426

427-
// Every page must belong to exactly one book
427+
// Every page must belong to at least one book
428428
const allBookPageIds = result.books.flatMap((b) => b.pageIds);
429429
for (const page of result.pages) {
430430
expect(allBookPageIds).toContain(page.pageId);
431431
}
432-
// Enforce exactly-once membership (no page duplicated across books)
433-
const uniqueBookPageIds = new Set(allBookPageIds);
434-
expect(uniqueBookPageIds.size).toBe(allBookPageIds.length);
435432
// Every book's medoid must be one of its own pages
436433
for (const book of result.books) {
437434
const storedBook = await metadataStore.getBook(book.bookId);

0 commit comments

Comments
 (0)