Skip to content

Commit 6115889

Browse files
authored
feat: stream flight sql queries (#645)
With this change it is now possible to query _stream variant tables that produce an unbounded stream of new data as it arrives in the tables.
1 parent 761bc8a commit 6115889

File tree

37 files changed

+2538
-899
lines changed

37 files changed

+2538
-899
lines changed

Cargo.lock

Lines changed: 7 additions & 33 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ members = [
77
"anchor-service",
88
"api",
99
"api-server",
10-
"arrow-test",
1110
"core",
1211
"event",
1312
"event-svc",
@@ -69,8 +68,7 @@ ceramic-actor-macros = { path = "./actor-macros" }
6968
ceramic-anchor-service = { path = "./anchor-service" }
7069
ceramic-anchor-remote = { path = "./anchor-remote" }
7170
ceramic-api = { path = "./api" }
72-
ceramic-api-server = { path = "./api-server" }
73-
ceramic-arrow-test = { path = "./arrow-test" }
71+
ceramic-api-server = { path = "./api-server", default-features = false }
7472
ceramic-car = { path = "./car" }
7573
ceramic-core = { path = "./core" }
7674
ceramic-event = { path = "./event" }
@@ -176,6 +174,7 @@ ring = "0.17.8"
176174
rkyv = "0.7.37"
177175
rlimit = "0.9.0"
178176
ruzstd = "0.3"
177+
schemars = "0.8"
179178
serde = { version = "1.0", features = ["derive"] }
180179
serde-error = "0.1.2"
181180
serde_bytes = "0.11"
@@ -187,9 +186,9 @@ serde_qs = "0.10.1"
187186
serde_with = "2.1"
188187
sha2 = { version = "0.10", default-features = false }
189188
sha3 = "0.10"
190-
snafu = "0.8.5"
191189
shutdown = { path = "./shutdown/" }
192190
smallvec = "1.10"
191+
snafu = "0.8.5"
193192
# pragma optimize hangs forver on 0.8, possibly due to libsqlite-sys upgrade
194193
sqlx = { version = "0.7", features = ["sqlite", "runtime-tokio", "chrono"] }
195194
ssh-key = { version = "0.5.1", default-features = false, features = [
@@ -268,3 +267,6 @@ repository = "https://github.com/3box/rust-ceramic"
268267
inherits = "release"
269268
debug = true
270269
strip = "none"
270+
271+
[patch.crates-io]
272+
#datafusion = { path = "../datafusion/datafusion/core" }

api-server/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ edition = "2018"
99
[features]
1010
default = ["client", "server"]
1111
client = [
12-
"hyper", "hyper-openssl", "hyper-tls", "native-tls", "openssl", "url"
13-
]
12+
"hyper", "hyper-openssl", "hyper-tls", "native-tls", "openssl", "url",
13+
"percent-encoding"]
1414
server = [
1515
"serde_ignored", "hyper", "regex", "percent-encoding", "url", "lazy_static"
1616
]

api-server/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ To see how to make this your own, look here:
1515
[README]((https://openapi-generator.tech))
1616

1717
- API version: 0.47.3
18-
- Build date: 2025-01-13T18:59:21.238278812Z[Etc/UTC]
18+
- Build date: 2025-01-14T10:24:44.041460427-07:00[America/Denver]
1919

2020

2121

api/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ publish = false
1111
[dependencies]
1212
anyhow.workspace = true
1313
async-trait.workspace = true
14+
ceramic-actor.workspace = true
1415
ceramic-api-server.workspace = true
1516
ceramic-core.workspace = true
1617
ceramic-event.workspace = true
@@ -43,3 +44,4 @@ expect-test.workspace = true
4344
test-log.workspace = true
4445
mockall.workspace = true
4546
object_store.workspace = true
47+
ceramic-pipeline = { workspace = true, features = ["mock"] }

api/src/server.rs

Lines changed: 41 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ use std::{
2222

2323
use anyhow::Result;
2424
use async_trait::async_trait;
25+
use ceramic_actor::ActorHandle as _;
2526
use ceramic_api_server::models::{BadRequestResponse, ErrorResponse, EventData, Peer, Peers};
2627
use ceramic_api_server::{
2728
models::{self, Event},
@@ -37,7 +38,8 @@ use ceramic_api_server::{
3738
use ceramic_core::{
3839
ensure_multiaddr_has_p2p, Cid, EventId, Interest, Network, NodeId, PeerId, StreamId,
3940
};
40-
use ceramic_pipeline::EVENT_STATES_TABLE;
41+
use ceramic_pipeline::aggregator::{AggregatorHandle, StreamStateMsg};
42+
use ceramic_pipeline::PipelineHandle;
4143
use datafusion::arrow::array::{
4244
as_dictionary_array, as_map_array, Array as _, ArrayAccessor as _, BinaryArray,
4345
};
@@ -386,7 +388,7 @@ pub struct Server<C, I, M, P> {
386388
marker: PhantomData<C>,
387389
authentication: bool,
388390

389-
pipeline: Option<SessionContext>,
391+
pipeline: Option<PipelineHandle>,
390392
}
391393

392394
impl<C, I, M, P> Server<C, I, M, P>
@@ -401,7 +403,7 @@ where
401403
interest: I,
402404
model: Arc<M>,
403405
p2p: P,
404-
pipeline: Option<SessionContext>,
406+
pipeline: Option<PipelineHandle>,
405407
shutdown_signal: Shutdown,
406408
) -> Self {
407409
let (tx, event_rx) = tokio::sync::mpsc::channel::<EventInsert>(1024);
@@ -876,130 +878,47 @@ where
876878

877879
async fn get_stream_state(
878880
&self,
879-
pipeline: &SessionContext,
881+
aggregator: AggregatorHandle,
880882
stream_id: StreamId,
881883
) -> Result<ceramic_api_server::StreamsStreamIdGetResponse, ErrorResponse> {
882-
let state_batch = pipeline
883-
.table(EVENT_STATES_TABLE)
884-
.await
885-
.map_err(|err| {
886-
ErrorResponse::new(format!("{} table not found: {err}", EVENT_STATES_TABLE))
887-
})?
888-
.select(vec![
889-
col("stream_cid"),
890-
col("event_cid"),
891-
col("dimensions"),
892-
col("controller"),
893-
col("data"),
894-
col("index"),
895-
])
896-
.map_err(|err| ErrorResponse::new(format!("failed to select: {err}")))?
897-
.filter(col("stream_cid").eq(lit(stream_id.cid.to_bytes())))
898-
.map_err(|err| ErrorResponse::new(format!("failed to filter: {err}")))?
899-
.aggregate(
900-
vec![col("stream_cid"), col("controller")],
901-
vec![
902-
last_value(vec![col("data")])
903-
.order_by(vec![col("index").sort(true, true)])
904-
.build()
905-
.map_err(|err| {
906-
ErrorResponse::new(format!(
907-
"failed to define last_value state query: {err}"
908-
))
909-
})?
910-
.alias("data"),
911-
last_value(vec![col("event_cid")])
912-
.order_by(vec![col("index").sort(true, true)])
913-
.build()
914-
.map_err(|err| {
915-
ErrorResponse::new(format!(
916-
"failed to define last_value event_cid query: {err}"
917-
))
918-
})?
919-
.alias("event_cid"),
920-
last_value(vec![col("dimensions")])
921-
.order_by(vec![col("index").sort(true, true)])
922-
.build()
923-
.map_err(|err| {
924-
ErrorResponse::new(format!(
925-
"failed to define last_value event_cid dimensions: {err}"
926-
))
927-
})?
928-
.alias("dimensions"),
929-
],
930-
)
931-
.map_err(|err| ErrorResponse::new(format!("failed to define window: {err}")))?
932-
.collect()
884+
let state = aggregator
885+
.send(StreamStateMsg {
886+
id: stream_id.clone(),
887+
})
933888
.await
934-
.map_err(|err| {
935-
ErrorResponse::new(format!("failed to execute pipeline query: {err}"))
936-
})?;
937-
938-
if state_batch.is_empty() {
939-
return Ok(
889+
.map_err(|err| ErrorResponse::new(err.to_string()))?
890+
.map_err(|err| ErrorResponse::new(err.to_string()))?;
891+
if let Some(state) = state {
892+
Ok(ceramic_api_server::StreamsStreamIdGetResponse::Success(
893+
models::StreamState {
894+
id: state.id.to_string(),
895+
event_cid: state.event_cid.to_string(),
896+
controller: state.controller,
897+
dimensions: serde_json::Value::Object(
898+
state
899+
.dimensions
900+
.into_iter()
901+
.map(|(k, v)| {
902+
(
903+
k,
904+
serde_json::Value::String(multibase::encode(
905+
multibase::Base::Base64Url,
906+
v,
907+
)),
908+
)
909+
})
910+
.collect(),
911+
),
912+
data: multibase::encode(multibase::Base::Base64Url, state.data),
913+
},
914+
))
915+
} else {
916+
Ok(
940917
ceramic_api_server::StreamsStreamIdGetResponse::StreamNotFound(
941918
stream_id.to_string(),
942919
),
943-
);
944-
}
945-
946-
let batch = concat_batches(&state_batch[0].schema(), state_batch.iter())
947-
.map_err(|err| ErrorResponse::new(format!("failed to concat batches: {err}")))?;
948-
let data = as_binary_array(
949-
batch
950-
.column_by_name("data")
951-
.ok_or_else(|| ErrorResponse::new("state column should exist".to_string()))?,
952-
)
953-
.map_err(|err| ErrorResponse::new(format!("state should be a string column: {err}")))?
954-
.value(0);
955-
let event_cid = as_binary_array(
956-
batch
957-
.column_by_name("event_cid")
958-
.ok_or_else(|| ErrorResponse::new("event_cid column should exist".to_string()))?,
959-
)
960-
.map_err(|err| ErrorResponse::new(format!("event_cid should be a binary column: {err}")))?;
961-
let controller = as_string_array(
962-
batch
963-
.column_by_name("controller")
964-
.ok_or_else(|| ErrorResponse::new("controller column should exist".to_string()))?,
965-
)
966-
.map_err(|err| {
967-
ErrorResponse::new(format!("controller should be a string column: {err}"))
968-
})?;
969-
let dimensions = as_map_array(
970-
batch
971-
.column_by_name("dimensions")
972-
.ok_or_else(|| ErrorResponse::new("dimensions column should exist".to_string()))?,
973-
);
974-
let keys = as_string_array(dimensions.keys()).map_err(|err| {
975-
ErrorResponse::new(format!("dimensions keys should be strings: {err}"))
976-
})?;
977-
let values = as_dictionary_array::<Int32Type>(dimensions.values())
978-
.downcast_dict::<BinaryArray>()
979-
.ok_or_else(|| ErrorResponse::new("dimensions values should be binary".to_string()))?;
980-
let mut dimensions = serde_json::Map::with_capacity(keys.len());
981-
for i in 0..keys.len() {
982-
let key = keys.value(i);
983-
let value = values.value(i);
984-
dimensions.insert(
985-
key.to_string(),
986-
serde_json::Value::String(multibase::encode(multibase::Base::Base64Url, value)),
987-
);
920+
)
988921
}
989-
990-
Ok(ceramic_api_server::StreamsStreamIdGetResponse::Success(
991-
models::StreamState {
992-
id: stream_id.to_string(),
993-
event_cid: Cid::read_bytes(event_cid.value(0))
994-
.map_err(|err| {
995-
ErrorResponse::new(format!("event_cid should be valid cid: {err}"))
996-
})?
997-
.to_string(),
998-
controller: controller.value(0).to_string(),
999-
dimensions: serde_json::Value::Object(dimensions),
1000-
data: multibase::encode(multibase::Base::Base64Url, data),
1001-
},
1002-
))
1003922
}
1004923
async fn get_peers(&self) -> Result<PeersGetResponse, ErrorResponse> {
1005924
let peers =
@@ -1260,8 +1179,8 @@ where
12601179
))
12611180
}
12621181
};
1263-
if let Some(pipeline) = &self.pipeline {
1264-
self.get_stream_state(pipeline, stream_id)
1182+
if let Some(aggregator) = self.pipeline.as_ref().and_then(|p| p.aggregator()) {
1183+
self.get_stream_state(aggregator, stream_id)
12651184
.await
12661185
.or_else(|err| {
12671186
Ok(ceramic_api_server::StreamsStreamIdGetResponse::InternalServerError(err))

0 commit comments

Comments
 (0)