1- import type { Book , MetadataStore , VectorStore } from "../core/types" ;
1+ import type { Book , MetadataStore , Volume , Shelf , VectorStore } from "../core/types" ;
22import type { ModelProfile } from "../core/ModelProfile" ;
33import { hashText } from "../core/crypto/hash" ;
44import type { KeyPair } from "../core/crypto/sign" ;
55import { EmbeddingRunner } from "../embeddings/EmbeddingRunner" ;
66import { chunkText } from "./Chunker" ;
77import { buildPage } from "./PageBuilder" ;
8- import { runPromotionSweep } from "../core/SalienceEngine" ;
98import { insertSemanticNeighbors } from "./FastNeighborInsert" ;
9+ import { buildHierarchy } from "./HierarchyBuilder" ;
1010
1111export interface IngestOptions {
1212 modelProfile : ModelProfile ;
@@ -19,46 +19,15 @@ export interface IngestOptions {
1919
2020export interface IngestResult {
2121 pages : Array < Awaited < ReturnType < typeof buildPage > > > ;
22- /** The single Book representing everything ingested by this call.
23- * One ingest call = one Book, always. All pages are members.
24- * A collection of Books becomes a Volume; a collection of Volumes
25- * becomes a Shelf — those tiers are assembled by the Daydreamer . */
22+ /** All Books produced by this ingest call. The hierarchy builder chunks
23+ * pages into books of up to PAGES_PER_BOOK and computes a medoid for each. */
24+ books : Book [ ] ;
25+ /** Convenience alias for `books[0]` — undefined when no pages were ingested . */
2626 book ?: Book ;
27- }
28-
29- function cosineDistance ( a : Float32Array , b : Float32Array ) : number {
30- let dot = 0 ;
31- let normA = 0 ;
32- let normB = 0 ;
33- for ( let i = 0 ; i < a . length ; i ++ ) {
34- dot += a [ i ] * b [ i ] ;
35- normA += a [ i ] * a [ i ] ;
36- normB += b [ i ] * b [ i ] ;
37- }
38- const denom = Math . sqrt ( normA ) * Math . sqrt ( normB ) ;
39- if ( denom === 0 ) return 0 ;
40- return 1 - dot / denom ;
41- }
42-
43- /**
44- * Selects the index of the medoid: the element that minimises total cosine
45- * distance to every other element in the set.
46- */
47- function selectMedoidIndex ( vectors : Float32Array [ ] ) : number {
48- if ( vectors . length === 1 ) return 0 ;
49- let bestIdx = 0 ;
50- let bestTotal = Infinity ;
51- for ( let i = 0 ; i < vectors . length ; i ++ ) {
52- let total = 0 ;
53- for ( let j = 0 ; j < vectors . length ; j ++ ) {
54- if ( i !== j ) total += cosineDistance ( vectors [ i ] , vectors [ j ] ) ;
55- }
56- if ( total < bestTotal ) {
57- bestTotal = total ;
58- bestIdx = i ;
59- }
60- }
61- return bestIdx ;
27+ /** Volumes produced by grouping books during hierarchy construction. */
28+ volumes : Volume [ ] ;
29+ /** Shelves produced by grouping volumes during hierarchy construction. */
30+ shelves : Shelf [ ] ;
6231}
6332
6433export async function ingestText (
@@ -76,7 +45,7 @@ export async function ingestText(
7645
7746 const chunks = chunkText ( text , modelProfile ) ;
7847 if ( chunks . length === 0 ) {
79- return { pages : [ ] , book : undefined } ;
48+ return { pages : [ ] , books : [ ] , book : undefined , volumes : [ ] , shelves : [ ] } ;
8049 }
8150
8251 const createdAt = new Date ( now ) . toISOString ( ) ;
@@ -124,23 +93,7 @@ export async function ingestText(
12493 } ) ;
12594 }
12695
127- // Build ONE Book for the entire ingest.
128- // A Book = the document we just ingested; its identity is the sorted set of
129- // its pages. Its representative is the page whose embedding is the medoid
130- // (minimum total cosine distance to all other pages in the document).
131- const medoidIdx = selectMedoidIndex ( embeddings ) ;
132- const sortedPageIds = [ ...pageIds ] . sort ( ) ;
133- const bookId = await hashText ( sortedPageIds . join ( "|" ) ) ;
134- const book : Book = {
135- bookId,
136- pageIds,
137- medoidPageId : pageIds [ medoidIdx ] ,
138- meta : { } ,
139- } ;
140- await metadataStore . putBook ( book ) ;
141-
14296 // Insert semantic neighbor edges for the new pages against all stored pages.
143- // Volumes and Shelves are assembled by the Daydreamer from accumulated Books.
14497 const allPages = await metadataStore . getAllPages ( ) ;
14598 const allPageIds = allPages . map ( ( p ) => p . pageId ) ;
14699 await insertSemanticNeighbors ( pageIds , allPageIds , {
@@ -149,8 +102,20 @@ export async function ingestText(
149102 metadataStore,
150103 } ) ;
151104
152- // Run hotpath promotion for the newly ingested pages and book.
153- await runPromotionSweep ( [ ...pageIds , bookId ] , metadataStore ) ;
105+ // Build the full hierarchy: Pages → Books → Volumes → Shelves.
106+ // buildHierarchy handles medoid selection, adjacency edges, prototype
107+ // computation, Williams fanout enforcement, and promotion sweeps.
108+ const hierarchy = await buildHierarchy ( pageIds , {
109+ modelProfile,
110+ vectorStore,
111+ metadataStore,
112+ } ) ;
154113
155- return { pages, book } ;
114+ return {
115+ pages,
116+ books : hierarchy . books ,
117+ book : hierarchy . books [ 0 ] ,
118+ volumes : hierarchy . volumes ,
119+ shelves : hierarchy . shelves ,
120+ } ;
156121}
0 commit comments