From 6222a69f3cc3ac4e826ff161ee8e89feb9ba0f67 Mon Sep 17 00:00:00 2001
From: Sean Griffin <sean@seantheprogrammer.com>
Date: Mon, 8 Jun 2026 09:04:12 -0600
Subject: [PATCH 1/4] Do not define total ordering for Datum

Datum has had total ordering defined since it was originally introduced
in 000b4cd7b. There's no context given for why, but I'm willing to
assume the answer is "because it could be derived". At one point the
implementation moved from derived to manually defined with identical
semantics (in a commit written by claude that has no reasoning in the
commit message, so I assume wasn't thought out).

The reason this is possible is because PG does define total ordering for
each of its data types. The only data type for which this isn't
obviously the case is floats, and PG defines all NaNs as equal and
greater than non NaNs. Rust then also defines what ordering for
differing enum variants means in its `PartialOrd` derive, which
primarily orders on discriminant.

In both `PartialEq` and `PartialOrd`, we diverge from PG's behavior in
that we do not ever consider cross type comparisons, while PG does have
several opclasses that do not require the lhs and rhs to be the same
type. This is *probably* fine, as I believe the only we could ever end
up comparing `Datum`s of differing types expecting a meaningful answer
are:

- The datum variant of a column changes across rows (impossible)
- The datum variant of a column changes across shards (should be
  impossible)
- We are blindly comparing datums from two columns that may have
  differing types or against a hard coded value and didn't consider this
  (unlikely, but possible)

The last case is a bit of a footgun, but one that I'm comfortable enough
with to not immediately go have us try to perfectly match PG's semantics
with. `1::int4 != 1.0::real` is not necessarily ideal, but is at least a
logical answer and one that is very hard for us to reach in our code.

Leaving `PartialOrd` alone, however, makes me much less comfortable. The
semantics of the derived implementation would mean `1::int4 >
2.0::real`, which is much more obviously wrong. And were that footgun to
ever go off, it's feasible that it wouldn't be caught by a test, unless
the code comparing datums considers every possible way lhs and rhs could
differ in type.

Because of that, I have manually implemented `PartialOrd` to explicitly
return `None` when the types differ, along with any comparison with
`Null` returning `None` to reflect that PG returns `NULL` in that case.
I opted not to have this reflect the behavior of PG's `ORDER BY`, which
is `NULLS FIRST` by default, as that code handles NULLs explicitly, and
I would expect the behavior of `PartialOrd` to match the `<` operator in
SQL.

The `PartialOrd` impl was written in this more verbose way, rather than
with a single `_ if discriminant(self) != discriminant(other)` so that
any variants added to `Datum` in the future will fail to compile if they
are not handled in the impl. (Hilariously, this meant I couldn't write
`(Null, _) | (_, Null)` in the last arm to match the shape of the
others, as the compiler correclty points out that `(_, Null)` is
redundant as we've already handled every other possible type on the left
explicitly)

I have left the `Eq` impl alone since returning `false` is a reasonable
answer for differing types, and the only requirement that Rust defines
for `Eq` is that `a == a` which is the case.
---
 pgdog-postgres-types/src/array.rs             |  2 +-
 pgdog-postgres-types/src/datum.rs             | 45 ++++++++++++++++++-
 .../src/backend/pool/connection/aggregate.rs  |  2 +-
 3 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/pgdog-postgres-types/src/array.rs b/pgdog-postgres-types/src/array.rs
index 402ecb1bd..b7e865d21 100644
--- a/pgdog-postgres-types/src/array.rs
+++ b/pgdog-postgres-types/src/array.rs
@@ -3,7 +3,7 @@ use bytes::{Buf, BufMut, Bytes, BytesMut};
 use super::{Error, Format};
 use crate::{DataType, Datum};
 
-#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialOrd, PartialEq, Eq, Hash)]
 pub struct Array {
     elements: Vec<Option<Datum>>,
     element_oid: i32,
diff --git a/pgdog-postgres-types/src/datum.rs b/pgdog-postgres-types/src/datum.rs
index 8de553da7..9dacec2b2 100644
--- a/pgdog-postgres-types/src/datum.rs
+++ b/pgdog-postgres-types/src/datum.rs
@@ -1,3 +1,4 @@
+use std::cmp::{Ordering, PartialOrd};
 use std::ops::Add;
 
 use bytes::Bytes;
@@ -9,7 +10,7 @@ use crate::{
     TimestampTz, ToDataRowColumn,
 };
 
-#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum Datum {
     /// BIGINT.
     Bigint(i64),
@@ -47,6 +48,48 @@ pub enum Datum {
     Boolean(bool),
 }
 
+impl PartialOrd for Datum {
+    fn partial_cmp(&self, other: &Datum) -> Option<Ordering> {
+        use Datum::*;
+
+        match (self, other) {
+            (Bigint(a), Bigint(b)) => a.partial_cmp(b),
+            (Bigint(_), _) | (_, Bigint(_)) => None,
+            (Integer(a), Integer(b)) => a.partial_cmp(b),
+            (Integer(_), _) | (_, Integer(_)) => None,
+            (SmallInt(a), SmallInt(b)) => a.partial_cmp(b),
+            (SmallInt(_), _) | (_, SmallInt(_)) => None,
+            (Interval(a), Interval(b)) => a.partial_cmp(b),
+            (Interval(_), _) | (_, Interval(_)) => None,
+            (Text(a), Text(b)) => a.partial_cmp(b),
+            (Text(_), _) | (_, Text(_)) => None,
+            (Timestamp(a), Timestamp(b)) => a.partial_cmp(b),
+            (Timestamp(_), _) | (_, Timestamp(_)) => None,
+            (TimestampTz(a), TimestampTz(b)) => a.partial_cmp(b),
+            (TimestampTz(_), _) | (_, TimestampTz(_)) => None,
+            (Uuid(a), Uuid(b)) => a.partial_cmp(b),
+            (Uuid(_), _) | (_, Uuid(_)) => None,
+            (Numeric(a), Numeric(b)) => a.partial_cmp(b),
+            (Numeric(_), _) | (_, Numeric(_)) => None,
+            (Float(a), Float(b)) => a.partial_cmp(b),
+            (Float(_), _) | (_, Float(_)) => None,
+            (Double(a), Double(b)) => a.partial_cmp(b),
+            (Double(_), _) | (_, Double(_)) => None,
+            (Vector(a), Vector(b)) => a.partial_cmp(b),
+            (Vector(_), _) | (_, Vector(_)) => None,
+            (Oid(a), Oid(b)) => a.partial_cmp(b),
+            (Oid(_), _) | (_, Oid(_)) => None,
+            (Array(a), Array(b)) => a.partial_cmp(b),
+            (Array(_), _) | (_, Array(_)) => None,
+            (Unknown(a), Unknown(b)) => a.partial_cmp(b),
+            (Unknown(_), _) | (_, Unknown(_)) => None,
+            (Boolean(a), Boolean(b)) => a.partial_cmp(b),
+            (Boolean(_), _) | (_, Boolean(_)) => None,
+            (Null, _) => None,
+        }
+    }
+}
+
 impl ToDataRowColumn for Datum {
     fn to_data_row_column(&self) -> Data {
         use Datum::*;
diff --git a/pgdog/src/backend/pool/connection/aggregate.rs b/pgdog/src/backend/pool/connection/aggregate.rs
index 1e070104f..1ec90c04b 100644
--- a/pgdog/src/backend/pool/connection/aggregate.rs
+++ b/pgdog/src/backend/pool/connection/aggregate.rs
@@ -21,7 +21,7 @@ use rust_decimal::Decimal;
 use super::Error;
 
 /// GROUP BY <columns>
-#[derive(Hash, PartialEq, Eq, PartialOrd, Ord, Debug)]
+#[derive(Hash, PartialEq, Eq, PartialOrd, Debug)]
 struct Grouping {
     columns: Vec<(usize, Datum)>,
 }

From 5b61a26e39a3c2852a0af3532e743bad07280164 Mon Sep 17 00:00:00 2001
From: Sean Griffin <sean@seantheprogrammer.com>
Date: Mon, 8 Jun 2026 10:50:23 -0600
Subject: [PATCH 2/4] Handle NULLS in ORDER BY, fix a test that was testing
 nothing

I reversed the order in that test because it seems like it cares about
how we compare NULL with certain timestamps, and not that we support
NULLS FIRST|LAST specifically
---
 .../tests/integration/timestamp_sorting.rs    | 11 ++-
 pgdog/src/backend/pool/connection/buffer.rs   | 82 ++++++++++---------
 2 files changed, 49 insertions(+), 44 deletions(-)

diff --git a/integration/rust/tests/integration/timestamp_sorting.rs b/integration/rust/tests/integration/timestamp_sorting.rs
index 591ea1cb1..446e6a730 100644
--- a/integration/rust/tests/integration/timestamp_sorting.rs
+++ b/integration/rust/tests/integration/timestamp_sorting.rs
@@ -120,18 +120,17 @@ async fn test_timestamp_sorting_across_shards() {
     .unwrap();
 
     let rows = sharded_conn
-        .fetch_all(
-            "SELECT id, name, updated_at FROM timestamp_test ORDER BY updated_at DESC NULLS LAST",
-        )
+        .fetch_all("SELECT id, name, updated_at FROM timestamp_test ORDER BY updated_at ASC")
         .await
         .unwrap();
 
-    let last_rows: Vec<Option<chrono::NaiveDateTime>> = rows
+    let last_rows: Vec<Option<chrono::DateTime<chrono::Utc>>> = rows
         .iter()
         .rev()
         .take(2)
-        .map(|row| row.try_get(2).ok())
-        .collect();
+        .map(|row| row.try_get(2))
+        .collect::<Result<Vec<_>, _>>()
+        .unwrap();
 
     assert!(
         last_rows.iter().any(|v| v.is_none()),
diff --git a/pgdog/src/backend/pool/connection/buffer.rs b/pgdog/src/backend/pool/connection/buffer.rs
index 20bf1a373..dcb074bc9 100644
--- a/pgdog/src/backend/pool/connection/buffer.rs
+++ b/pgdog/src/backend/pool/connection/buffer.rs
@@ -16,6 +16,8 @@ use crate::{
     },
 };
 
+use pgdog_postgres_types::Datum;
+
 use super::Aggregates;
 
 /// Sort and aggregate rows received from multiple shards.
@@ -80,48 +82,52 @@ impl Buffer {
 
         // Sort rows.
         let order_by = move |a: &DataRow, b: &DataRow| -> Ordering {
-            for col in cols.iter() {
-                let index = col.index();
-                let asc = col.asc();
-                let index = if let Some(index) = index {
-                    index
-                } else {
-                    continue;
-                };
-                let left = a.get_column(index, decoder);
-                let right = b.get_column(index, decoder);
-
-                let ordering = match (left, right) {
-                    (Ok(Some(left)), Ok(Some(right))) => {
-                        // Handle the special vector case.
-                        if let OrderBy::AscVectorL2(_, vector) = col {
-                            let left: Option<Vector> = left.value.try_into().ok();
-                            let right: Option<Vector> = right.value.try_into().ok();
-
-                            if let (Some(left), Some(right)) = (left, right) {
-                                let left = left.distance_l2(vector);
-                                let right = right.distance_l2(vector);
-
-                                left.partial_cmp(&right)
+            cols.iter()
+                .filter_map(|col| {
+                    let index = col.index();
+                    let asc = col.asc();
+                    let Some(index) = index else {
+                        return None;
+                    };
+                    let left = a.get_column(index, decoder);
+                    let right = b.get_column(index, decoder);
+
+                    match (left, right) {
+                        (Ok(Some(left)), Ok(Some(right))) => {
+                            // Handle the special vector case.
+                            if let OrderBy::AscVectorL2(_, vector) = col {
+                                let left: Option<Vector> = left.value.try_into().ok();
+                                let right: Option<Vector> = right.value.try_into().ok();
+
+                                if let (Some(left), Some(right)) = (left, right) {
+                                    let left = left.distance_l2(vector);
+                                    let right = right.distance_l2(vector);
+
+                                    left.partial_cmp(&right)
+                                } else {
+                                    Some(Ordering::Equal)
+                                }
                             } else {
-                                Some(Ordering::Equal)
+                                // FIXME(sage): We don't handle ASC NULLS FIRST or
+                                // DESC NULLS LAST we should either error or add
+                                // support rather than silently do the wrong sorting
+                                match (&left.value, &right.value, asc) {
+                                    (Datum::Null, Datum::Null, _) => Some(Ordering::Equal),
+                                    (Datum::Null, _, true) => Some(Ordering::Greater),
+                                    (_, Datum::Null, true) => Some(Ordering::Less),
+                                    (Datum::Null, _, false) => Some(Ordering::Less),
+                                    (_, Datum::Null, false) => Some(Ordering::Greater),
+                                    (a, b, true) => a.partial_cmp(b),
+                                    (a, b, false) => b.partial_cmp(a),
+                                }
                             }
-                        } else if asc {
-                            left.value.partial_cmp(&right.value)
-                        } else {
-                            right.value.partial_cmp(&left.value)
                         }
-                    }
-
-                    _ => Some(Ordering::Equal),
-                };
 
-                if ordering != Some(Ordering::Equal) {
-                    return ordering.unwrap_or(Ordering::Equal);
-                }
-            }
-
-            Ordering::Equal
+                        _ => Some(Ordering::Equal),
+                    }
+                })
+                .reduce(Ordering::then)
+                .unwrap_or(Ordering::Equal)
         };
 
         self.buffer.make_contiguous().sort_by(order_by);

From 264de1fa43c9d270707161ee861a2f11011386cf Mon Sep 17 00:00:00 2001
From: Sean Griffin <sean@seantheprogrammer.com>
Date: Tue, 9 Jun 2026 06:01:30 -0600
Subject: [PATCH 3/4] wip

---
 pgdog/src/backend/pool/connection/aggregate.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pgdog/src/backend/pool/connection/aggregate.rs b/pgdog/src/backend/pool/connection/aggregate.rs
index 1ec90c04b..d1a42e2c2 100644
--- a/pgdog/src/backend/pool/connection/aggregate.rs
+++ b/pgdog/src/backend/pool/connection/aggregate.rs
@@ -21,7 +21,7 @@ use rust_decimal::Decimal;
 use super::Error;
 
 /// GROUP BY <columns>
-#[derive(Hash, PartialEq, Eq, PartialOrd, Debug)]
+#[derive(Hash, PartialEq, Eq, Debug)]
 struct Grouping {
     columns: Vec<(usize, Datum)>,
 }

From 381ca5461e63d36860e16a8de252301bb4247d41 Mon Sep 17 00:00:00 2001
From: Sean Griffin <sean@seantheprogrammer.com>
Date: Tue, 9 Jun 2026 06:16:22 -0600
Subject: [PATCH 4/4] Document expectations for trait impls on datum

---
 pgdog-postgres-types/src/datum.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pgdog-postgres-types/src/datum.rs b/pgdog-postgres-types/src/datum.rs
index 9dacec2b2..4fc6a9567 100644
--- a/pgdog-postgres-types/src/datum.rs
+++ b/pgdog-postgres-types/src/datum.rs
@@ -10,6 +10,9 @@ use crate::{
     TimestampTz, ToDataRowColumn,
 };
 
+/// Represents a single piece of data in expression position. Trait
+/// implementations for Rust operators match the semantics of that
+/// operator/opclass in expression position in PG
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum Datum {
     /// BIGINT.