Skip to content

Commit a99c42e

Browse files
committed
adding save/load functionality
1 parent b6f8966 commit a99c42e

File tree

6 files changed

+281
-9
lines changed

6 files changed

+281
-9
lines changed

.github/workflows/bigendian.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
2+
name: Go-PPC64-CI
3+
4+
on: [push, pull_request]
5+
6+
jobs:
7+
test:
8+
strategy:
9+
matrix:
10+
go-version: [1.24.x]
11+
platform: [ubuntu-latest]
12+
runs-on: ${{ matrix.platform }}
13+
steps:
14+
- name: Install Go
15+
uses: actions/setup-go@v5
16+
with:
17+
go-version: ${{ matrix.go-version }}
18+
- name: Checkout code
19+
uses: actions/checkout@v4
20+
- name: Install
21+
run: |
22+
sudo apt-get update
23+
sudo apt install -y qemu-system-ppc64 qemu-user
24+
- name: Test
25+
run: |
26+
GOARCH=ppc64 go test ./...

README.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,19 +49,16 @@ An xor filter is immutable, it is concurrent. The expectation is that you build
4949

5050
Though the filter itself does not use much memory, the construction of the filter needs many bytes of memory per set entry.
5151

52-
For persistence, you only need to serialize the following data structure:
52+
For persistence, you can use `Save` and `LoadBinaryFuse8`. It is uses a portable format over different systems (little/big endian).
5353

5454
```Go
55-
type BinaryFuse8 struct {
56-
Seed uint64
57-
SegmentLength uint32
58-
SegmentLengthMask uint32
59-
SegmentCount uint32
60-
SegmentCountLength uint32
61-
Fingerprints []uint8
62-
}
55+
errsave := filter.Save(...)
56+
//...
57+
filter, errload := LoadBinaryFuse8(&buf)
6358
```
6459

60+
Note that it is a direct binary save/restore. There is not data integrity check: loading from corrupted sources might result in runtime errors. We recommend that you use hash codes for integrity checks.
61+
6562
When constructing the filter, you should ensure that there are not too many duplicate keys for best results.
6663

6764
## Generic (8-bit, 16-bit, 32-bit)
@@ -75,6 +72,9 @@ filter8, _ := xorfilter.NewBinaryFuse[uint8](keys) // 0.39% false positive rate,
7572
filter16, _ := xorfilter.NewBinaryFuse[uint16](keys) // 0.0015% false positive rate, uses about 18 bits per key
7673
filter32, _ := xorfilter.NewBinaryFuse[uint32](keys) // 2e-08% false positive rate, uses about 36 bits per key
7774
```
75+
76+
You can similarly save or load the data with `Save` and `LoadBinaryFuse[uint16](...)`.
77+
7878
The 32-bit fingerprints are provided but not recommended. Most users will want to use either the 8-bit or 16-bit fingerprints.
7979

8080
The Binary Fuse filters have memory usages of about 9 bits per key in the 8-bit case, 18 bits per key in the 16-bit case,

binaryfusefilter8.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package xorfilter
22

3+
import "io"
4+
35
type BinaryFuse8 BinaryFuse[uint8]
46

57
// PopulateBinaryFuse8 fills the filter with provided keys. For best results,
@@ -18,3 +20,17 @@ func PopulateBinaryFuse8(keys []uint64) (*BinaryFuse8, error) {
1820
func (filter *BinaryFuse8) Contains(key uint64) bool {
1921
return (*BinaryFuse[uint8])(filter).Contains(key)
2022
}
23+
24+
// Save writes the filter to the writer in little endian format.
25+
func (f *BinaryFuse8) Save(w io.Writer) error {
26+
return (*BinaryFuse[uint8])(f).Save(w)
27+
}
28+
29+
// LoadBinaryFuse8 reads the filter from the reader in little endian format.
30+
func LoadBinaryFuse8(r io.Reader) (*BinaryFuse8, error) {
31+
filter, err := LoadBinaryFuse[uint8](r)
32+
if err != nil {
33+
return nil, err
34+
}
35+
return (*BinaryFuse8)(filter), nil
36+
}

serialization.go

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
//go:build (!amd64 && !386 && !arm && !arm64 && !ppc64le && !mipsle && !mips64le && !mips64p32le && !wasm) || appengine
2+
// +build !amd64,!386,!arm,!arm64,!ppc64le,!mipsle,!mips64le,!mips64p32le,!wasm appengine
3+
4+
package xorfilter
5+
6+
import (
7+
"encoding/binary"
8+
"io"
9+
)
10+
11+
// Save writes the filter to the writer in little endian format.
12+
func (f *BinaryFuse[T]) Save(w io.Writer) error {
13+
if err := binary.Write(w, binary.LittleEndian, f.Seed); err != nil {
14+
return err
15+
}
16+
if err := binary.Write(w, binary.LittleEndian, f.SegmentLength); err != nil {
17+
return err
18+
}
19+
if err := binary.Write(w, binary.LittleEndian, f.SegmentLengthMask); err != nil {
20+
return err
21+
}
22+
if err := binary.Write(w, binary.LittleEndian, f.SegmentCount); err != nil {
23+
return err
24+
}
25+
if err := binary.Write(w, binary.LittleEndian, f.SegmentCountLength); err != nil {
26+
return err
27+
}
28+
// Write the length of Fingerprints
29+
fpLen := uint32(len(f.Fingerprints))
30+
if err := binary.Write(w, binary.LittleEndian, fpLen); err != nil {
31+
return err
32+
}
33+
// Write the Fingerprints
34+
for _, fp := range f.Fingerprints {
35+
if err := binary.Write(w, binary.LittleEndian, fp); err != nil {
36+
return err
37+
}
38+
}
39+
return nil
40+
}
41+
42+
// LoadBinaryFuse reads the filter from the reader in little endian format.
43+
func LoadBinaryFuse[T Unsigned](r io.Reader) (*BinaryFuse[T], error) {
44+
var f BinaryFuse[T]
45+
if err := binary.Read(r, binary.LittleEndian, &f.Seed); err != nil {
46+
return nil, err
47+
}
48+
if err := binary.Read(r, binary.LittleEndian, &f.SegmentLength); err != nil {
49+
return nil, err
50+
}
51+
if err := binary.Read(r, binary.LittleEndian, &f.SegmentLengthMask); err != nil {
52+
return nil, err
53+
}
54+
if err := binary.Read(r, binary.LittleEndian, &f.SegmentCount); err != nil {
55+
return nil, err
56+
}
57+
if err := binary.Read(r, binary.LittleEndian, &f.SegmentCountLength); err != nil {
58+
return nil, err
59+
}
60+
// Read the length of Fingerprints
61+
var fpLen uint32
62+
if err := binary.Read(r, binary.LittleEndian, &fpLen); err != nil {
63+
return nil, err
64+
}
65+
f.Fingerprints = make([]T, fpLen)
66+
for i := range f.Fingerprints {
67+
if err := binary.Read(r, binary.LittleEndian, &f.Fingerprints[i]); err != nil {
68+
return nil, err
69+
}
70+
}
71+
return &f, nil
72+
}

serialization_le.go

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
//go:build amd64 || 386 || arm || arm64 || ppc64le || mipsle || mips64le || mips64p32le || wasm
2+
3+
package xorfilter
4+
5+
import (
6+
"io"
7+
"unsafe"
8+
)
9+
10+
// Save writes the filter to the writer assuming little endian system, using direct byte copy for performance.
11+
func (f *BinaryFuse[T]) Save(w io.Writer) error {
12+
// Write Seed
13+
if _, err := w.Write((*[8]byte)(unsafe.Pointer(&f.Seed))[:]); err != nil {
14+
return err
15+
}
16+
// Write SegmentLength
17+
if _, err := w.Write((*[4]byte)(unsafe.Pointer(&f.SegmentLength))[:]); err != nil {
18+
return err
19+
}
20+
// Write SegmentLengthMask
21+
if _, err := w.Write((*[4]byte)(unsafe.Pointer(&f.SegmentLengthMask))[:]); err != nil {
22+
return err
23+
}
24+
// Write SegmentCount
25+
if _, err := w.Write((*[4]byte)(unsafe.Pointer(&f.SegmentCount))[:]); err != nil {
26+
return err
27+
}
28+
// Write SegmentCountLength
29+
if _, err := w.Write((*[4]byte)(unsafe.Pointer(&f.SegmentCountLength))[:]); err != nil {
30+
return err
31+
}
32+
// Write length of Fingerprints
33+
fpLen := uint32(len(f.Fingerprints))
34+
if _, err := w.Write((*[4]byte)(unsafe.Pointer(&fpLen))[:]); err != nil {
35+
return err
36+
}
37+
// Write Fingerprints
38+
if len(f.Fingerprints) > 0 {
39+
size := int(unsafe.Sizeof(T(0)))
40+
bytes := unsafe.Slice((*byte)(unsafe.Pointer(&f.Fingerprints[0])), len(f.Fingerprints)*size)
41+
if _, err := w.Write(bytes); err != nil {
42+
return err
43+
}
44+
}
45+
return nil
46+
}
47+
48+
// LoadBinaryFuse reads the filter from the reader assuming little endian system, using direct byte copy for performance.
49+
func LoadBinaryFuse[T Unsigned](r io.Reader) (*BinaryFuse[T], error) {
50+
var f BinaryFuse[T]
51+
// Read Seed
52+
if _, err := io.ReadFull(r, (*[8]byte)(unsafe.Pointer(&f.Seed))[:]); err != nil {
53+
return nil, err
54+
}
55+
// Read SegmentLength
56+
if _, err := io.ReadFull(r, (*[4]byte)(unsafe.Pointer(&f.SegmentLength))[:]); err != nil {
57+
return nil, err
58+
}
59+
// Read SegmentLengthMask
60+
if _, err := io.ReadFull(r, (*[4]byte)(unsafe.Pointer(&f.SegmentLengthMask))[:]); err != nil {
61+
return nil, err
62+
}
63+
// Read SegmentCount
64+
if _, err := io.ReadFull(r, (*[4]byte)(unsafe.Pointer(&f.SegmentCount))[:]); err != nil {
65+
return nil, err
66+
}
67+
// Read SegmentCountLength
68+
if _, err := io.ReadFull(r, (*[4]byte)(unsafe.Pointer(&f.SegmentCountLength))[:]); err != nil {
69+
return nil, err
70+
}
71+
// Read length of Fingerprints
72+
var fpLen uint32
73+
if _, err := io.ReadFull(r, (*[4]byte)(unsafe.Pointer(&fpLen))[:]); err != nil {
74+
return nil, err
75+
}
76+
f.Fingerprints = make([]T, fpLen)
77+
if fpLen > 0 {
78+
size := int(unsafe.Sizeof(T(0)))
79+
bytes := unsafe.Slice((*byte)(unsafe.Pointer(&f.Fingerprints[0])), int(fpLen)*size)
80+
if _, err := io.ReadFull(r, bytes); err != nil {
81+
return nil, err
82+
}
83+
}
84+
return &f, nil
85+
}

serialization_test.go

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
package xorfilter
2+
3+
import (
4+
"bytes"
5+
"encoding/base64"
6+
"reflect"
7+
"testing"
8+
)
9+
10+
func TestBinaryFuse8Serialization(t *testing.T) {
11+
keys := []uint64{1, 2, 3, 4, 5, 100, 200, 300}
12+
filter, err := PopulateBinaryFuse8(keys)
13+
if err != nil {
14+
t.Fatal(err)
15+
}
16+
17+
// Test generic serialization
18+
var buf bytes.Buffer
19+
err = filter.Save(&buf)
20+
if err != nil {
21+
t.Fatal(err)
22+
}
23+
24+
loadedFilter, err := LoadBinaryFuse8(&buf)
25+
if err != nil {
26+
t.Fatal(err)
27+
}
28+
29+
if !reflect.DeepEqual(filter, loadedFilter) {
30+
t.Error("Generic serialization: Filters do not match after save/load")
31+
}
32+
33+
for _, key := range keys {
34+
if !loadedFilter.Contains(key) {
35+
t.Errorf("Generic serialization: Key %d not found in loaded filter", key)
36+
}
37+
}
38+
}
39+
40+
func TestBinaryFuseSerializationGeneric(t *testing.T) {
41+
keys := []uint64{1, 2, 3, 4, 5, 100, 200, 300}
42+
filter, err := NewBinaryFuse[uint16](keys)
43+
if err != nil {
44+
t.Fatal(err)
45+
}
46+
47+
// Test generic serialization
48+
var buf bytes.Buffer
49+
err = filter.Save(&buf)
50+
if err != nil {
51+
t.Fatal(err)
52+
}
53+
54+
if "wVwCiewtCpEIAAAABwAAAAEAAAAIAAAAGAAAAAAAAABY7/rBAAAAAAoqAAA2kPb5AAAAAAAAAAAAAAAAuLkw2QAAAAAAAH1sAAAAAA==" != base64.StdEncoding.EncodeToString(buf.Bytes()) {
55+
t.Log("Base64 serialized data:", base64.StdEncoding.EncodeToString(buf.Bytes()))
56+
t.Error("Generic serialization: Unexpected serialized data")
57+
}
58+
59+
loadedFilter, err := LoadBinaryFuse[uint16](&buf)
60+
if err != nil {
61+
t.Fatal(err)
62+
}
63+
64+
if !reflect.DeepEqual(filter, loadedFilter) {
65+
t.Error("Generic serialization: Filters do not match after save/load")
66+
}
67+
68+
for _, key := range keys {
69+
if !loadedFilter.Contains(key) {
70+
t.Errorf("Generic serialization: Key %d not found in loaded filter", key)
71+
}
72+
}
73+
}

0 commit comments

Comments
 (0)