Skip to content

Commit 15351fe

Browse files
committed
Byte buffer extraction.
1 parent 89ceaf1 commit 15351fe

File tree

1 file changed

+19
-16
lines changed

1 file changed

+19
-16
lines changed

arrow-row/src/radix.rs

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ pub fn radix_sort_to_indices(rows: &Rows) -> Vec<u32> {
9393
let n = rows.num_rows();
9494
let mut indices: Vec<u32> = (0..n as u32).collect();
9595
let mut temp = vec![0u32; n];
96-
msd_radix_sort(&mut indices, &mut temp, rows, 0, true);
96+
let mut bytes = vec![0u8; n];
97+
msd_radix_sort(&mut indices, &mut temp, &mut bytes, rows, 0, true);
9798
indices
9899
}
99100

@@ -121,6 +122,7 @@ unsafe fn row_byte(rows: &Rows, idx: u32, byte_pos: usize) -> u8 {
121122
fn msd_radix_sort(
122123
src: &mut [u32],
123124
dst: &mut [u32],
125+
bytes: &mut [u8],
124126
rows: &Rows,
125127
byte_pos: usize,
126128
result_in_src: bool,
@@ -155,15 +157,16 @@ fn msd_radix_sort(
155157
return;
156158
}
157159

158-
// Both the histogram and scatter loops read each row's byte via
159-
// row_unchecked. Pre-extracting bytes into a contiguous buffer was
160-
// tried but benchmarked slower — the extra write pass costs more
161-
// than the second read through row offsets already hot in cache.
160+
// Extract bytes and build histogram in one pass. The bytes buffer
161+
// is reused across levels so the scatter loop can read from a flat
162+
// array instead of chasing pointers through Rows a second time.
163+
let bytes = &mut bytes[..n];
162164
let mut counts = [0u32; 256];
163-
for &idx in &*src {
164-
// SAFETY: indices contains a permutation of 0..rows.num_rows()
165-
let byte = unsafe { row_byte(rows, idx, byte_pos) };
166-
counts[byte as usize] += 1;
165+
for (i, &idx) in src.iter().enumerate() {
166+
// SAFETY: src contains valid row indices
167+
let b = unsafe { row_byte(rows, idx, byte_pos) };
168+
bytes[i] = b;
169+
counts[b as usize] += 1;
167170
}
168171

169172
let mut offsets = [0u32; 257];
@@ -175,17 +178,16 @@ fn msd_radix_sort(
175178

176179
// No scatter happened — data is still in src, roles unchanged.
177180
if num_buckets == 1 {
178-
msd_radix_sort(src, dst, rows, byte_pos + 1, result_in_src);
181+
msd_radix_sort(src, dst, bytes, rows, byte_pos + 1, result_in_src);
179182
return;
180183
}
181184

182-
// Scatter src → dst
185+
// Scatter src → dst using the pre-extracted bytes
183186
let mut write_pos = offsets;
184-
for &idx in &*src {
185-
// SAFETY: indices contains a permutation of 0..rows.num_rows()
186-
let byte = unsafe { row_byte(rows, idx, byte_pos) } as usize;
187-
dst[write_pos[byte] as usize] = idx;
188-
write_pos[byte] += 1;
187+
for (i, &idx) in src.iter().enumerate() {
188+
let b = bytes[i] as usize;
189+
dst[write_pos[b] as usize] = idx;
190+
write_pos[b] += 1;
189191
}
190192

191193
// Recurse with roles swapped: after scatter the data lives in dst,
@@ -199,6 +201,7 @@ fn msd_radix_sort(
199201
msd_radix_sort(
200202
&mut dst[start..end],
201203
&mut src[start..end],
204+
&mut bytes[start..end],
202205
rows,
203206
byte_pos + 1,
204207
!result_in_src,

0 commit comments

Comments
 (0)