From 6222a69f3cc3ac4e826ff161ee8e89feb9ba0f67 Mon Sep 17 00:00:00 2001 From: Sean Griffin Date: Mon, 8 Jun 2026 09:04:12 -0600 Subject: [PATCH 1/4] Do not define total ordering for Datum Datum has had total ordering defined since it was originally introduced in 000b4cd7b. There's no context given for why, but I'm willing to assume the answer is "because it could be derived". At one point the implementation moved from derived to manually defined with identical semantics (in a commit written by claude that has no reasoning in the commit message, so I assume wasn't thought out). The reason this is possible is because PG does define total ordering for each of its data types. The only data type for which this isn't obviously the case is floats, and PG defines all NaNs as equal and greater than non NaNs. Rust then also defines what ordering for differing enum variants means in its `PartialOrd` derive, which primarily orders on discriminant. In both `PartialEq` and `PartialOrd`, we diverge from PG's behavior in that we do not ever consider cross type comparisons, while PG does have several opclasses that do not require the lhs and rhs to be the same type. This is *probably* fine, as I believe the only we could ever end up comparing `Datum`s of differing types expecting a meaningful answer are: - The datum variant of a column changes across rows (impossible) - The datum variant of a column changes across shards (should be impossible) - We are blindly comparing datums from two columns that may have differing types or against a hard coded value and didn't consider this (unlikely, but possible) The last case is a bit of a footgun, but one that I'm comfortable enough with to not immediately go have us try to perfectly match PG's semantics with. `1::int4 != 1.0::real` is not necessarily ideal, but is at least a logical answer and one that is very hard for us to reach in our code. Leaving `PartialOrd` alone, however, makes me much less comfortable. The semantics of the derived implementation would mean `1::int4 > 2.0::real`, which is much more obviously wrong. And were that footgun to ever go off, it's feasible that it wouldn't be caught by a test, unless the code comparing datums considers every possible way lhs and rhs could differ in type. Because of that, I have manually implemented `PartialOrd` to explicitly return `None` when the types differ, along with any comparison with `Null` returning `None` to reflect that PG returns `NULL` in that case. I opted not to have this reflect the behavior of PG's `ORDER BY`, which is `NULLS FIRST` by default, as that code handles NULLs explicitly, and I would expect the behavior of `PartialOrd` to match the `<` operator in SQL. The `PartialOrd` impl was written in this more verbose way, rather than with a single `_ if discriminant(self) != discriminant(other)` so that any variants added to `Datum` in the future will fail to compile if they are not handled in the impl. (Hilariously, this meant I couldn't write `(Null, _) | (_, Null)` in the last arm to match the shape of the others, as the compiler correclty points out that `(_, Null)` is redundant as we've already handled every other possible type on the left explicitly) I have left the `Eq` impl alone since returning `false` is a reasonable answer for differing types, and the only requirement that Rust defines for `Eq` is that `a == a` which is the case. --- pgdog-postgres-types/src/array.rs | 2 +- pgdog-postgres-types/src/datum.rs | 45 ++++++++++++++++++- .../src/backend/pool/connection/aggregate.rs | 2 +- 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/pgdog-postgres-types/src/array.rs b/pgdog-postgres-types/src/array.rs index 402ecb1bd..b7e865d21 100644 --- a/pgdog-postgres-types/src/array.rs +++ b/pgdog-postgres-types/src/array.rs @@ -3,7 +3,7 @@ use bytes::{Buf, BufMut, Bytes, BytesMut}; use super::{Error, Format}; use crate::{DataType, Datum}; -#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialOrd, PartialEq, Eq, Hash)] pub struct Array { elements: Vec>, element_oid: i32, diff --git a/pgdog-postgres-types/src/datum.rs b/pgdog-postgres-types/src/datum.rs index 8de553da7..9dacec2b2 100644 --- a/pgdog-postgres-types/src/datum.rs +++ b/pgdog-postgres-types/src/datum.rs @@ -1,3 +1,4 @@ +use std::cmp::{Ordering, PartialOrd}; use std::ops::Add; use bytes::Bytes; @@ -9,7 +10,7 @@ use crate::{ TimestampTz, ToDataRowColumn, }; -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Datum { /// BIGINT. Bigint(i64), @@ -47,6 +48,48 @@ pub enum Datum { Boolean(bool), } +impl PartialOrd for Datum { + fn partial_cmp(&self, other: &Datum) -> Option { + use Datum::*; + + match (self, other) { + (Bigint(a), Bigint(b)) => a.partial_cmp(b), + (Bigint(_), _) | (_, Bigint(_)) => None, + (Integer(a), Integer(b)) => a.partial_cmp(b), + (Integer(_), _) | (_, Integer(_)) => None, + (SmallInt(a), SmallInt(b)) => a.partial_cmp(b), + (SmallInt(_), _) | (_, SmallInt(_)) => None, + (Interval(a), Interval(b)) => a.partial_cmp(b), + (Interval(_), _) | (_, Interval(_)) => None, + (Text(a), Text(b)) => a.partial_cmp(b), + (Text(_), _) | (_, Text(_)) => None, + (Timestamp(a), Timestamp(b)) => a.partial_cmp(b), + (Timestamp(_), _) | (_, Timestamp(_)) => None, + (TimestampTz(a), TimestampTz(b)) => a.partial_cmp(b), + (TimestampTz(_), _) | (_, TimestampTz(_)) => None, + (Uuid(a), Uuid(b)) => a.partial_cmp(b), + (Uuid(_), _) | (_, Uuid(_)) => None, + (Numeric(a), Numeric(b)) => a.partial_cmp(b), + (Numeric(_), _) | (_, Numeric(_)) => None, + (Float(a), Float(b)) => a.partial_cmp(b), + (Float(_), _) | (_, Float(_)) => None, + (Double(a), Double(b)) => a.partial_cmp(b), + (Double(_), _) | (_, Double(_)) => None, + (Vector(a), Vector(b)) => a.partial_cmp(b), + (Vector(_), _) | (_, Vector(_)) => None, + (Oid(a), Oid(b)) => a.partial_cmp(b), + (Oid(_), _) | (_, Oid(_)) => None, + (Array(a), Array(b)) => a.partial_cmp(b), + (Array(_), _) | (_, Array(_)) => None, + (Unknown(a), Unknown(b)) => a.partial_cmp(b), + (Unknown(_), _) | (_, Unknown(_)) => None, + (Boolean(a), Boolean(b)) => a.partial_cmp(b), + (Boolean(_), _) | (_, Boolean(_)) => None, + (Null, _) => None, + } + } +} + impl ToDataRowColumn for Datum { fn to_data_row_column(&self) -> Data { use Datum::*; diff --git a/pgdog/src/backend/pool/connection/aggregate.rs b/pgdog/src/backend/pool/connection/aggregate.rs index 1e070104f..1ec90c04b 100644 --- a/pgdog/src/backend/pool/connection/aggregate.rs +++ b/pgdog/src/backend/pool/connection/aggregate.rs @@ -21,7 +21,7 @@ use rust_decimal::Decimal; use super::Error; /// GROUP BY -#[derive(Hash, PartialEq, Eq, PartialOrd, Ord, Debug)] +#[derive(Hash, PartialEq, Eq, PartialOrd, Debug)] struct Grouping { columns: Vec<(usize, Datum)>, } From 5b61a26e39a3c2852a0af3532e743bad07280164 Mon Sep 17 00:00:00 2001 From: Sean Griffin Date: Mon, 8 Jun 2026 10:50:23 -0600 Subject: [PATCH 2/4] Handle NULLS in ORDER BY, fix a test that was testing nothing I reversed the order in that test because it seems like it cares about how we compare NULL with certain timestamps, and not that we support NULLS FIRST|LAST specifically --- .../tests/integration/timestamp_sorting.rs | 11 ++- pgdog/src/backend/pool/connection/buffer.rs | 82 ++++++++++--------- 2 files changed, 49 insertions(+), 44 deletions(-) diff --git a/integration/rust/tests/integration/timestamp_sorting.rs b/integration/rust/tests/integration/timestamp_sorting.rs index 591ea1cb1..446e6a730 100644 --- a/integration/rust/tests/integration/timestamp_sorting.rs +++ b/integration/rust/tests/integration/timestamp_sorting.rs @@ -120,18 +120,17 @@ async fn test_timestamp_sorting_across_shards() { .unwrap(); let rows = sharded_conn - .fetch_all( - "SELECT id, name, updated_at FROM timestamp_test ORDER BY updated_at DESC NULLS LAST", - ) + .fetch_all("SELECT id, name, updated_at FROM timestamp_test ORDER BY updated_at ASC") .await .unwrap(); - let last_rows: Vec> = rows + let last_rows: Vec>> = rows .iter() .rev() .take(2) - .map(|row| row.try_get(2).ok()) - .collect(); + .map(|row| row.try_get(2)) + .collect::, _>>() + .unwrap(); assert!( last_rows.iter().any(|v| v.is_none()), diff --git a/pgdog/src/backend/pool/connection/buffer.rs b/pgdog/src/backend/pool/connection/buffer.rs index 20bf1a373..dcb074bc9 100644 --- a/pgdog/src/backend/pool/connection/buffer.rs +++ b/pgdog/src/backend/pool/connection/buffer.rs @@ -16,6 +16,8 @@ use crate::{ }, }; +use pgdog_postgres_types::Datum; + use super::Aggregates; /// Sort and aggregate rows received from multiple shards. @@ -80,48 +82,52 @@ impl Buffer { // Sort rows. let order_by = move |a: &DataRow, b: &DataRow| -> Ordering { - for col in cols.iter() { - let index = col.index(); - let asc = col.asc(); - let index = if let Some(index) = index { - index - } else { - continue; - }; - let left = a.get_column(index, decoder); - let right = b.get_column(index, decoder); - - let ordering = match (left, right) { - (Ok(Some(left)), Ok(Some(right))) => { - // Handle the special vector case. - if let OrderBy::AscVectorL2(_, vector) = col { - let left: Option = left.value.try_into().ok(); - let right: Option = right.value.try_into().ok(); - - if let (Some(left), Some(right)) = (left, right) { - let left = left.distance_l2(vector); - let right = right.distance_l2(vector); - - left.partial_cmp(&right) + cols.iter() + .filter_map(|col| { + let index = col.index(); + let asc = col.asc(); + let Some(index) = index else { + return None; + }; + let left = a.get_column(index, decoder); + let right = b.get_column(index, decoder); + + match (left, right) { + (Ok(Some(left)), Ok(Some(right))) => { + // Handle the special vector case. + if let OrderBy::AscVectorL2(_, vector) = col { + let left: Option = left.value.try_into().ok(); + let right: Option = right.value.try_into().ok(); + + if let (Some(left), Some(right)) = (left, right) { + let left = left.distance_l2(vector); + let right = right.distance_l2(vector); + + left.partial_cmp(&right) + } else { + Some(Ordering::Equal) + } } else { - Some(Ordering::Equal) + // FIXME(sage): We don't handle ASC NULLS FIRST or + // DESC NULLS LAST we should either error or add + // support rather than silently do the wrong sorting + match (&left.value, &right.value, asc) { + (Datum::Null, Datum::Null, _) => Some(Ordering::Equal), + (Datum::Null, _, true) => Some(Ordering::Greater), + (_, Datum::Null, true) => Some(Ordering::Less), + (Datum::Null, _, false) => Some(Ordering::Less), + (_, Datum::Null, false) => Some(Ordering::Greater), + (a, b, true) => a.partial_cmp(b), + (a, b, false) => b.partial_cmp(a), + } } - } else if asc { - left.value.partial_cmp(&right.value) - } else { - right.value.partial_cmp(&left.value) } - } - - _ => Some(Ordering::Equal), - }; - if ordering != Some(Ordering::Equal) { - return ordering.unwrap_or(Ordering::Equal); - } - } - - Ordering::Equal + _ => Some(Ordering::Equal), + } + }) + .reduce(Ordering::then) + .unwrap_or(Ordering::Equal) }; self.buffer.make_contiguous().sort_by(order_by); From 264de1fa43c9d270707161ee861a2f11011386cf Mon Sep 17 00:00:00 2001 From: Sean Griffin Date: Tue, 9 Jun 2026 06:01:30 -0600 Subject: [PATCH 3/4] wip --- pgdog/src/backend/pool/connection/aggregate.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pgdog/src/backend/pool/connection/aggregate.rs b/pgdog/src/backend/pool/connection/aggregate.rs index 1ec90c04b..d1a42e2c2 100644 --- a/pgdog/src/backend/pool/connection/aggregate.rs +++ b/pgdog/src/backend/pool/connection/aggregate.rs @@ -21,7 +21,7 @@ use rust_decimal::Decimal; use super::Error; /// GROUP BY -#[derive(Hash, PartialEq, Eq, PartialOrd, Debug)] +#[derive(Hash, PartialEq, Eq, Debug)] struct Grouping { columns: Vec<(usize, Datum)>, } From 381ca5461e63d36860e16a8de252301bb4247d41 Mon Sep 17 00:00:00 2001 From: Sean Griffin Date: Tue, 9 Jun 2026 06:16:22 -0600 Subject: [PATCH 4/4] Document expectations for trait impls on datum --- pgdog-postgres-types/src/datum.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pgdog-postgres-types/src/datum.rs b/pgdog-postgres-types/src/datum.rs index 9dacec2b2..4fc6a9567 100644 --- a/pgdog-postgres-types/src/datum.rs +++ b/pgdog-postgres-types/src/datum.rs @@ -10,6 +10,9 @@ use crate::{ TimestampTz, ToDataRowColumn, }; +/// Represents a single piece of data in expression position. Trait +/// implementations for Rust operators match the semantics of that +/// operator/opclass in expression position in PG #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Datum { /// BIGINT.