Skip to content

Commit 83f658e

Browse files
jordanhunt22Convex, Inc.
authored andcommitted
[Text Index Usage Tracking] Start tracking text index ingress (#44038)
Adds new fields in usage tracking for text index ingress + egress. Ingress tracking is now hooked up in the committer and we include only the indexed field using an `estimated_size()` function. Text index egress is not tracked yet because it requires some additional changes to pipe through the read size from funrun and we have to decide how we want to bill the size of text index reads. Follow-up items are to: - [x] charge for text index size - [ ] start tracking database egress GitOrigin-RevId: 5c5df769795e78d79c57f7fedb272bb645bba3e5
1 parent 083120b commit 83f658e

File tree

18 files changed

+363
-119
lines changed

18 files changed

+363
-119
lines changed

crates/application/src/function_log.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,8 @@ impl FunctionExecution {
284284
storage_write_bytes: self.usage_stats.storage_write_bytes,
285285
vector_index_read_bytes: self.usage_stats.vector_index_read_bytes,
286286
vector_index_write_bytes: self.usage_stats.vector_index_write_bytes,
287+
text_index_read_bytes: self.usage_stats.text_index_read_bytes,
288+
text_index_write_bytes: self.usage_stats.text_index_write_bytes,
287289
memory_used_mb: self.memory_used_mb,
288290
return_bytes: self.return_bytes,
289291
},

crates/common/src/log_streaming.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ pub struct AggregatedFunctionUsageStats {
6767
pub storage_write_bytes: u64,
6868
pub vector_index_read_bytes: u64,
6969
pub vector_index_write_bytes: u64,
70+
pub text_index_read_bytes: u64,
71+
pub text_index_write_bytes: u64,
7072
pub memory_used_mb: u64,
7173
pub return_bytes: Option<u64>,
7274
}
@@ -974,6 +976,8 @@ mod tests {
974976
storage_write_bytes: 0,
975977
vector_index_read_bytes: 0,
976978
vector_index_write_bytes: 0,
979+
text_index_read_bytes: 0,
980+
text_index_write_bytes: 0,
977981
memory_used_mb: 0,
978982
return_bytes: Some(64),
979983
},

crates/database/src/committer.rs

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ use itertools::Itertools;
9393
use parking_lot::Mutex;
9494
use prometheus::VMHistogram;
9595
use rand::Rng;
96+
use search::TextIndexWriteSize;
9697
use tokio::sync::{
9798
mpsc::{
9899
self,
@@ -757,14 +758,15 @@ impl<RT: Runtime> Committer<RT> {
757758
.latest_snapshot()
758759
.unwrap_or_else(|| self.snapshot_manager.read().latest_snapshot());
759760
for &document_update in ordered_updates.iter() {
760-
let (updates, doc_in_vector_index) =
761+
let (updates, doc_in_vector_index, text_index_write_size) =
761762
latest_pending_snapshot.update(document_update, commit_ts)?;
762763
index_writes.extend(updates);
763764
document_writes.push(ValidatedDocumentWrite {
764765
commit_ts,
765766
id: document_update.id.into(),
766767
write: document_update.new_document.clone(),
767768
doc_in_vector_index,
769+
text_index_write_size,
768770
prev_ts: document_update.old_document.as_ref().map(|&(_, ts)| ts),
769771
});
770772
}
@@ -1039,6 +1041,7 @@ impl<RT: Runtime> Committer<RT> {
10391041
let ValidatedDocumentWrite {
10401042
write: document,
10411043
doc_in_vector_index,
1044+
text_index_write_size,
10421045
..
10431046
} = validated_write;
10441047
if let Some(document) = document {
@@ -1054,28 +1057,35 @@ impl<RT: Runtime> Committer<RT> {
10541057
.unwrap_or(ComponentPath::root());
10551058
if let Ok(table_name) = table_mapping.tablet_name(tablet_id) {
10561059
// Database bandwidth for document writes
1057-
if *doc_in_vector_index == DocInVectorIndex::Absent {
1058-
usage_tracker.track_database_ingress_size(
1060+
usage_tracker.track_database_ingress_size(
1061+
component_path.clone().clone(),
1062+
table_name.to_string(),
1063+
document_write_size as u64,
1064+
table_name.is_system(),
1065+
);
1066+
usage_tracker.track_database_ingress_size_v2(
1067+
component_path.clone(),
1068+
virtual_system_mapping
1069+
.system_to_virtual_table(&table_name)
1070+
.unwrap_or(&table_name)
1071+
.to_string(),
1072+
document_write_size as u64,
1073+
table_name.is_system()
1074+
&& !virtual_system_mapping.has_virtual_table(&table_name),
1075+
);
1076+
if *doc_in_vector_index == DocInVectorIndex::Present {
1077+
usage_tracker.track_vector_ingress_size(
10591078
component_path.clone(),
10601079
table_name.to_string(),
10611080
document_write_size as u64,
10621081
table_name.is_system(),
10631082
);
1064-
usage_tracker.track_database_ingress_size_v2(
1065-
component_path,
1066-
virtual_system_mapping
1067-
.system_to_virtual_table(&table_name)
1068-
.unwrap_or(&table_name)
1069-
.to_string(),
1070-
document_write_size as u64,
1071-
table_name.is_system()
1072-
&& !virtual_system_mapping.has_virtual_table(&table_name),
1073-
);
1074-
} else {
1075-
usage_tracker.track_vector_ingress_size(
1083+
}
1084+
if text_index_write_size.0 > 0 {
1085+
usage_tracker.track_text_ingress_size(
10761086
component_path,
10771087
table_name.to_string(),
1078-
document_write_size as u64,
1088+
text_index_write_size.0,
10791089
table_name.is_system(),
10801090
);
10811091
}
@@ -1118,6 +1128,7 @@ struct ValidatedDocumentWrite {
11181128
id: InternalDocumentId,
11191129
write: Option<ResolvedDocument>,
11201130
doc_in_vector_index: DocInVectorIndex,
1131+
text_index_write_size: TextIndexWriteSize,
11211132
prev_ts: Option<Timestamp>,
11221133
}
11231134

crates/database/src/snapshot_manager.rs

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@ use indexing::{
3131
backend_in_memory_indexes::BackendInMemoryIndexes,
3232
index_registry::IndexRegistry,
3333
};
34-
use search::TextIndexManager;
34+
use search::{
35+
TextIndexManager,
36+
TextIndexWriteSize,
37+
};
3538
use value::{
3639
ResolvedDocumentId,
3740
TableMapping,
@@ -220,7 +223,11 @@ impl Snapshot {
220223
&mut self,
221224
document_update: &impl DocumentUpdateRef,
222225
commit_ts: Timestamp,
223-
) -> anyhow::Result<(Vec<DatabaseIndexUpdate>, DocInVectorIndex)> {
226+
) -> anyhow::Result<(
227+
Vec<DatabaseIndexUpdate>,
228+
DocInVectorIndex,
229+
TextIndexWriteSize,
230+
)> {
224231
block_in_place(|| {
225232
let removal = document_update.old_document();
226233
let insertion = document_update.new_document();
@@ -268,7 +275,8 @@ impl Snapshot {
268275
insertion.cloned(),
269276
);
270277

271-
self.text_indexes
278+
let text_index_write_size = self
279+
.text_indexes
272280
.update(
273281
&self.index_registry,
274282
removal,
@@ -286,7 +294,11 @@ impl Snapshot {
286294
WriteTimestamp::Committed(commit_ts),
287295
)
288296
.context("Vector index update failed")?;
289-
Ok((in_memory_index_updates, doc_in_vector_index))
297+
Ok((
298+
in_memory_index_updates,
299+
doc_in_vector_index,
300+
text_index_write_size,
301+
))
290302
})
291303
}
292304

crates/database/src/tests/text_test_utils.rs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ use common::{
3535
},
3636
version::MIN_NPM_VERSION_FOR_FUZZY_SEARCH,
3737
};
38+
use events::testing::TestUsageEventLogger;
3839
use futures::try_join;
3940
use maplit::btreeset;
4041
use must_let::must_let;
@@ -92,6 +93,7 @@ pub struct TextFixtures {
9293
pub storage: Arc<dyn Storage>,
9394
pub db: Database<TestRuntime>,
9495
pub reader: Arc<dyn PersistenceReader>,
96+
pub test_usage_logger: TestUsageEventLogger,
9597
searcher: Arc<dyn Searcher>,
9698
segment_term_metadata_fetcher: Arc<dyn SegmentTermMetadataFetcher>,
9799
writer: TextIndexMetadataWriter<TestRuntime>,
@@ -114,6 +116,7 @@ impl TextFixtures {
114116
db,
115117
search_storage,
116118
searcher,
119+
test_usage_logger,
117120
..
118121
} = DbFixtures::new_with_args(
119122
&rt,
@@ -144,6 +147,7 @@ impl TextFixtures {
144147
namespace: TableNamespace::test_user(),
145148
searcher,
146149
config,
150+
test_usage_logger,
147151
})
148152
}
149153

@@ -193,7 +197,7 @@ impl TextFixtures {
193197
)
194198
}
195199

196-
pub async fn enabled_text_index(&self) -> anyhow::Result<IndexData> {
200+
pub async fn enabled_text_index(&self) -> anyhow::Result<TextIndexData> {
197201
let index_data = self.backfilled_text_index().await?;
198202
let mut tx = self.db.begin_system().await?;
199203
IndexModel::new(&mut tx)
@@ -203,7 +207,7 @@ impl TextFixtures {
203207
Ok(index_data)
204208
}
205209

206-
pub async fn backfilled_text_index(&self) -> anyhow::Result<IndexData> {
210+
pub async fn backfilled_text_index(&self) -> anyhow::Result<TextIndexData> {
207211
let index_data = self.insert_backfilling_text_index().await?;
208212
self.backfill().await?;
209213

@@ -249,7 +253,7 @@ impl TextFixtures {
249253
Ok(ts)
250254
}
251255

252-
pub async fn insert_backfilling_text_index(&self) -> anyhow::Result<IndexData> {
256+
pub async fn insert_backfilling_text_index(&self) -> anyhow::Result<TextIndexData> {
253257
let mut tx = self.db.begin_system().await?;
254258
let index_metadata = backfilling_text_index()?;
255259
let index_name = &index_metadata.name;
@@ -264,15 +268,17 @@ impl TextFixtures {
264268
self.db.commit(tx).await?;
265269

266270
let resolved_index_name = TabletIndexName::new(table_id, index_name.descriptor().clone())?;
267-
Ok(IndexData {
271+
Ok(TextIndexData {
268272
index_id,
269273
resolved_index_name,
270274
index_name: index_name.clone(),
271275
namespace: self.namespace,
272276
})
273277
}
274278

275-
pub async fn insert_backfilling_text_index_with_document(&self) -> anyhow::Result<IndexData> {
279+
pub async fn insert_backfilling_text_index_with_document(
280+
&self,
281+
) -> anyhow::Result<TextIndexData> {
276282
let index_data = self.insert_backfilling_text_index().await?;
277283
let mut tx = self.db.begin_system().await?;
278284
add_document(&mut tx, index_data.index_name.table(), "A long text field").await?;
@@ -404,7 +410,7 @@ impl TextFixtures {
404410
const TABLE_NAME: &str = "table";
405411
const SEARCH_FIELD: &str = "text";
406412

407-
pub struct IndexData {
413+
pub struct TextIndexData {
408414
pub index_id: ResolvedDocumentId,
409415
pub index_name: IndexName,
410416
pub resolved_index_name: TabletIndexName,

0 commit comments

Comments
 (0)