sui_indexer_alt_schema/blooms/
bloom.rs1use crate::blooms::hash::DoubleHasher;
5use crate::blooms::hash::set_bit;
6
7#[derive(Debug, Clone)]
9pub struct BloomFilter<const BYTES: usize, const HASHES: u32, const SEED: u128> {
10 bytes: [u8; BYTES],
11}
12
13impl<const BYTES: usize, const HASHES: u32, const SEED: u128> BloomFilter<BYTES, HASHES, SEED> {
14 pub fn new() -> Self {
16 Self {
17 bytes: [0u8; BYTES],
18 }
19 }
20
21 pub fn insert(&mut self, key: &[u8]) {
22 for bit_idx in Self::hash(key) {
23 set_bit(&mut self.bytes, bit_idx);
24 }
25 }
26
27 pub fn popcount(&self) -> usize {
28 self.bytes.iter().map(|b| b.count_ones() as usize).sum()
29 }
30
31 pub fn fold(self, min_fold_bytes: usize, max_fold_density: f64) -> Vec<u8> {
39 let mut bytes = self.bytes.to_vec();
40
41 while bytes.len().is_multiple_of(2) && bytes.len() >= 2 * min_fold_bytes {
42 let popcount: usize = bytes.iter().map(|b| b.count_ones() as usize).sum();
44 let density = popcount as f64 / (bytes.len() * 8) as f64;
45 if density > max_fold_density {
46 break;
47 }
48
49 let half = bytes.len() / 2;
51 for i in 0..half {
52 bytes[i] |= bytes[half + i];
53 }
54 bytes.truncate(half);
55 }
56
57 bytes
58 }
59
60 pub fn hash(value: &[u8]) -> impl Iterator<Item = usize> {
62 let num_bits = BYTES * 8;
63 DoubleHasher::with_value(value, SEED)
64 .take(HASHES as usize)
65 .map(move |h| (h as usize) % num_bits)
66 }
67}
68
69impl<const BYTES: usize, const HASHES: u32, const SEED: u128> Default
70 for BloomFilter<BYTES, HASHES, SEED>
71{
72 fn default() -> Self {
73 Self::new()
74 }
75}
76
77impl<T: AsRef<[u8]>, const BYTES: usize, const HASHES: u32, const SEED: u128> Extend<T>
78 for BloomFilter<BYTES, HASHES, SEED>
79{
80 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
81 for key in iter {
82 self.insert(key.as_ref());
83 }
84 }
85}
86
87#[cfg(test)]
88mod tests {
89 use crate::blooms::hash;
90 use crate::cp_blooms::MAX_FOLD_DENSITY;
91 use crate::cp_blooms::MIN_FOLD_BYTES;
92
93 use super::*;
94
95 type TestBloomFilter = BloomFilter<1024, 5, 67>;
97 type TestBloomFilter2048 = BloomFilter<2048, 5, 67>;
98
99 impl<const BYTES: usize, const HASHES: u32, const SEED: u128> BloomFilter<BYTES, HASHES, SEED> {
100 pub fn contains(bytes: &[u8], key: &[u8]) -> bool {
102 let num_bits = bytes.len() * 8;
103 Self::hash(key).all(|pos| hash::check_bit(bytes, pos % num_bits))
104 }
105
106 pub fn density(&self) -> f64 {
108 self.popcount() as f64 / (BYTES * 8) as f64
109 }
110 }
111
112 #[test]
113 fn test_insert_and_contains() {
114 let mut bloom = TestBloomFilter::new();
115
116 let key1 = b"hello";
117 let key2 = b"world";
118
119 bloom.insert(key1);
120 bloom.insert(key2);
121
122 assert!(
123 TestBloomFilter::contains(&bloom.bytes, key1),
124 "Should contain inserted key"
125 );
126 assert!(
127 TestBloomFilter::contains(&bloom.bytes, key2),
128 "Should contain inserted key"
129 );
130 }
131
132 #[test]
133 fn test_fold_preserves_membership() {
134 let mut bloom = TestBloomFilter::new();
135
136 let keys: Vec<Vec<u8>> = (0..50).map(|i| format!("key{}", i).into_bytes()).collect();
137 for key in &keys {
138 bloom.insert(key);
139 }
140
141 let folded_bytes = bloom.fold(MIN_FOLD_BYTES, MAX_FOLD_DENSITY);
142
143 for key in &keys {
145 assert!(
146 TestBloomFilter::contains(&folded_bytes, key),
147 "Folded filter must contain all original keys"
148 );
149 }
150 }
151
152 #[test]
153 fn test_fold_minimum_size() {
154 let mut bloom = TestBloomFilter2048::new();
155
156 bloom.insert(b"single");
158
159 let folded_bytes = bloom.fold(MIN_FOLD_BYTES, MAX_FOLD_DENSITY);
160
161 assert!(
163 folded_bytes.len() >= MIN_FOLD_BYTES,
164 "Should not fold below minimum size"
165 );
166 assert_eq!(
167 folded_bytes.len(),
168 MIN_FOLD_BYTES,
169 "Very sparse filter should fold to minimum"
170 );
171 assert!(
172 TestBloomFilter2048::contains(&folded_bytes, b"single"),
173 "Should still contain the key"
174 );
175 }
176
177 #[test]
178 fn test_density() {
179 let mut bloom = TestBloomFilter::new();
180 assert_eq!(bloom.density(), 0.0, "Empty filter should have 0 density");
181
182 bloom.insert(b"key1");
183 let density = bloom.density();
184 assert!(density > 0.0, "Should have some bits set");
185 assert!(density < 0.01, "Few keys should have low density");
186 }
187}