/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ use crate::{ clubcard::ClubcardIndex, Clubcard, ClubcardIndexEntry, Equation, Filterable, Queryable, }; use rand::{thread_rng, Rng}; use std::collections::BTreeMap; use std::fmt; /// Marker type for checking that, for example, only Exact ribbons are passed to functions such as /// Clubcard::collect_exact_ribbons. pub struct Exact; /// A Ribbon Filter that encodes a one bit value for every element of the associated universe. pub type ExactRibbon = Ribbon; /// Marker type for checking that, for example, only Approximate ribbons are passed to functions such as /// Clubcard::collect_approximate_ribbons. pub struct Approximate; /// A Ribbon Filter that identifies a subset of a universe with a false positive rate of /// roughly |subset| / |universe|. pub type ApproximateRibbon = Ribbon; /// A RibbonBuilder collects a set of items for insertion into a Ribbon. If the optional filter is /// provided, then only items that are contained in the filter will be inserted. pub struct RibbonBuilder<'a, const W: usize, T: Filterable> { /// block id. id: Vec, /// items to be inserted. items: Vec, /// filter for pruning insertions. filter: Option<&'a PartitionedRibbonFilter>, /// size of the universe that contains self.items universe_size: usize, /// Whether queries against this ribbon indicate membership in R (inverted = false) or /// membership in U \ R (inverted = true). inverted: bool, } impl<'a, const W: usize, T: Filterable> RibbonBuilder<'a, W, T> { fn new( id: &[u8], filter: Option<&'a PartitionedRibbonFilter>, ) -> RibbonBuilder<'a, W, T> { RibbonBuilder { id: AsRef::<[u8]>::as_ref(id).to_vec(), items: vec![], filter, universe_size: 0, inverted: false, } } /// Queue `item` for insertion into the ribbon (if it is contained in the provided filter). pub fn insert(&mut self, item: T) { if let Some(filter) = self.filter { if filter.contains(&item) { self.items.push(item); } } else { self.items.push(item); } } /// Set the size of the universe. This only needs to be called if you /// are constructing an ApproximateRibbon. pub fn set_universe_size(&mut self, universe_size: usize) { self.universe_size = universe_size; } } impl<'a, const W: usize, T: Filterable> From> for ApproximateRibbon { /// Denote the inserted set by R and the universe by U. /// The ribbon returned by ApproximateRibbon::from encodes a function f : U -> {0, 1} where /// f(x) = 0 if and only if x is in R union S where S is a (random) subset of U \ R of size /// ~|R|. In other words, the ribbon solves the approximate membership query problem with a /// false positive rate roughly 2^-r = |R| / (|U| - |R|). /// The size of this ribbon is proportional to r|R|. fn from(mut builder: RibbonBuilder<'a, W, T>) -> ApproximateRibbon { assert!(builder.items.len() <= builder.universe_size); if builder.items.len() == builder.universe_size { ApproximateRibbon::new(&builder.id, 0, builder.universe_size, !builder.inverted) } else { let mut out = ApproximateRibbon::new( &builder.id, builder.items.len(), builder.universe_size, builder.inverted, ); for item in builder.items.drain(..) { out.insert(item); } // Insertions should not fail for a homogeneous system. assert!(out.exceptions.is_empty()); out } } } impl<'a, const W: usize, T: Filterable> From> for ExactRibbon { /// Denote the inserted set by R and the universe by U. /// The ribbon returned by ExactRibbon::from encodes the function "f(x) = 0 iff x in R". The /// size of this ribbon is proportional to |U|. In the typical use case, the set U is the /// result of filtering a larger universe with a false positive rate of 2^-r. This allows for /// exact encoding of R-membership using a pair of filters of total size ~(r+2)|R|. fn from(mut builder: RibbonBuilder<'a, W, T>) -> ExactRibbon { assert!(builder.universe_size == 0 || builder.universe_size == builder.items.len()); if let Some(filter) = builder.filter { if filter.block_is_empty(&builder.id) { // The approximate filter is empty, so it gives a definitive result on every // item and there's nothing to encode in the exact filter. return ExactRibbon::new(&builder.id, 0, filter.block_is_inverted(&builder.id)); } } let mut out = ExactRibbon::new(&builder.id, builder.items.len(), builder.inverted); // By inserting the included items first, we ensure that any exceptions that occur during // insertion are for excluded items. let mut excluded = vec![]; for item in builder.items.drain(..) { if item.included() { out.insert(item); } else { excluded.push(item); } } for item in excluded.drain(..) { out.insert(item); } out } } /// A compact representation of a linear system AX = B pub struct Ribbon, ApproxOrExact> { /// A block identifier. Used to build an index for partitioned filters. id: Vec, /// The overhead. epsilon: f64, /// Equal to (1+epsilon) * |R| m: usize, /// The rank is round(-log2(subset_size / (universe_size - subset_size))) rank: usize, /// A linear system in which each equation has s in {0, ..., m-1} rows: Vec>, /// A (typically short) list of items that failed insertion exceptions: Vec, /// Whether queries against this ribbon indicate membership in R (inverted = false) or /// membership in U \ R (inverted = true). inverted: bool, /// Marker for whether this is an Approximate or an Exact filter. phantom: std::marker::PhantomData, } impl, ApproxOrExact> fmt::Display for Ribbon { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, "ribbon({:?}): m: {}, rows: {}, rank: {}, exceptions: {}, epsilon: {}, overhead {}", self.id, self.m, self.rows.len(), self.rank, self.exceptions.len(), self.epsilon, (self.rows.iter().filter(|eq| eq.is_zero()).count() as f64 / (self.rows.len() as f64)) ) } } impl> ApproximateRibbon { /// Construct an empty ribbon to encode a set R of size `subset_size` in a universe U of size /// `universe_size`. fn new(id: &[u8], subset_size: usize, universe_size: usize, inverted: bool) -> Self { assert!(subset_size <= universe_size); // TODO: Tune epsilon as a function of the inputs. Numerical experiments? let epsilon = 0.02; let m = ((1.0 + epsilon) * (subset_size as f64)).floor() as usize; let rank = if subset_size == 0 || 2 * subset_size >= universe_size { 0 } else { (((universe_size - subset_size) as f64) / (subset_size as f64)) .log2() .floor() as usize }; Ribbon { id: AsRef::<[u8]>::as_ref(id).to_vec(), rows: vec![Equation::zero(); m], m, epsilon, rank, exceptions: vec![], inverted, phantom: std::marker::PhantomData, } } } impl> ExactRibbon { /// Construct an empty ribbon to encode a set R of size `subset_size` in a universe U of size /// `universe_size`. fn new(id: &impl AsRef<[u8]>, size: usize, inverted: bool) -> Self { // TODO: Tune epsilon as a function of the inputs. Numerical experiments? let epsilon = 0.02; let m = ((1.0 + epsilon) * (size as f64)).floor() as usize; Ribbon { id: AsRef::<[u8]>::as_ref(id).to_vec(), rows: vec![Equation::zero(); m], m, epsilon, rank: 1, exceptions: vec![], inverted, phantom: std::marker::PhantomData, } } } impl, ApproxOrExact> Ribbon { /// Hash the item to an Equation and insert it into the system. fn insert(&mut self, item: T) -> bool { let mut eq = item.as_query(self.m); eq.b = if item.included() { 0 } else { 1 }; assert!(eq.is_zero() || eq.a[0] & 1 == 1); let rv = self.insert_equation(eq); if !rv { self.exceptions.push(item) } rv } /// Insert an equation into the system using Algorithm 1 from fn insert_equation(&mut self, mut eq: Equation) -> bool { loop { if eq.is_zero() { return eq.b == 0; /* redundant (b=0) or inconsistent (b!=0) */ } if eq.s >= self.rows.len() { // TODO: could be smarter here self.rows.resize_with(eq.s + 1, Equation::zero); } let cur = &mut self.rows[eq.s]; if cur.is_zero() { *cur = eq; return true; /* inserted */ } eq.add(cur); } } /// Solve the system using back-substitution. If this is a block in a larger system, the `tail` /// argument should be set to the the solution vector for the block to the right of this one. fn solve(&self, tail: &[u64]) -> Vec { let mut z = vec![0u64; ((self.rows.len() + 63) / 64) + tail.len()]; // insert tail into z starting at bit self.rows.len() let k = self.rows.len() / 64; let p = self.rows.len() % 64; if p == 0 { z[k..(tail.len() + k)].copy_from_slice(tail); } else { for i in 0..tail.len() { z[k + i] |= tail[i] << p; z[k + i + 1] = tail[i] >> (64 - p) } } // Solve by back substitution for i in (0..self.rows.len()).rev() { let limb = i / 64; let pos = i % 64; let z_i = if self.rows[i].is_zero() { // Row i has a zero in column i, so we're free to choose. // TODO: We want multiple calls to solve() to give a different // solutions (when the system is suitably under-determined), // but it might be nice if this was deterministic. thread_rng().gen::() } else { // Let z' be the vector we get by setting bit i of z to z'_i. // Since z_i is zero, and row i has a one in column i, we have // row_i(z') = z'_i ^ row_i(z). // We want row_i(z') = b, so we must choose // z'_i = row_i(z) ^ b. self.rows[i].eval(&z) ^ self.rows[i].b }; z[limb] |= ((z_i & 1) as u64) << pos; } z } } #[derive(Debug)] struct PartitionedRibbonFilterIndexEntry { offset: usize, m: usize, rank: usize, exceptions: Vec>, inverted: bool, } type PartitionedRibbonFilterIndex = BTreeMap, PartitionedRibbonFilterIndexEntry>; /// A solution to a ribbon system, along with metadata necessary for querying it. struct PartitionedRibbonFilter, ApproxOrExact> { index: PartitionedRibbonFilterIndex, solution: Vec>, phantom: std::marker::PhantomData, phantom2: std::marker::PhantomData, } impl, ApproxOrExact> fmt::Display for PartitionedRibbonFilter { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "PartitionedRibbonFilter({:?})", self.index) } } impl, Approximate> PartitionedRibbonFilter { fn block_is_empty(&self, block: &[u8]) -> bool { let Some(entry) = self.index.get(block) else { return false; }; entry.m == 0 } fn block_is_inverted(&self, block: &[u8]) -> bool { let Some(entry) = self.index.get(block) else { return false; }; entry.inverted } /// Check if this filter contains the given item in the given block. fn contains(&self, item: &T) -> bool { let Some(entry) = self.index.get(item.block()) else { return false; }; let result = (|| { // Empty blocks do not contain anything, // despite having inner product 0 with everything. if entry.m == 0 { return false; } let mut eq = item.as_query(entry.m); eq.s += entry.offset; for i in 0..entry.rank { if eq.eval(&self.solution[i]) != 0 { return false; } } for exception in &entry.exceptions { if exception == item.discriminant() { return false; } } true })(); result ^ entry.inverted } } impl, ApproxOrExact> From>> for PartitionedRibbonFilter { fn from( mut blocks: Vec>, ) -> PartitionedRibbonFilter { // Sort ribbons by descending rank (descending simplifies indexing). blocks.sort_unstable_by(|a, b| b.rank.cmp(&a.rank)); // Solve the (block) system. // The blocks are sorted by descending rank. We need at least one solution (i.e. column // vector) per block, but we need no more than i solutions for a block of rank i. // Concretely, suppose the ranks are [4, 2, 1, 0]. Then our solution can look like // block 0: | | | | // block 1: | | 0 0 // block 2: | 0 0 0 // block 3: | 0 0 0 // Since we serialize the block identifiers, offsets, and ranks in the final filter, we // don't need to encode the zeros. let mut solution = vec![]; let max_rank = blocks.first().map_or(0, |first| first.rank); for i in 0..max_rank { // Back substitution across blocks. let mut tail = vec![]; if max_rank > 1 { // randomizing the tail increases the odds that the solutions will be distinct tail.push(thread_rng().gen::()); } for j in (0..blocks.len()).rev() { if blocks[j].rank > i { tail = blocks[j].solve(&tail); } } while let Some(0) = tail.last() { tail.pop(); } solution.push(tail); } // construct the index---a map from a block identifier to that // block's offset in the solution vector. let mut index = PartitionedRibbonFilterIndex::new(); let mut offset = 0; for block in &blocks { let exceptions = block .exceptions .iter() .map(|x| x.discriminant().to_vec()) .collect(); index.insert( block.id.clone(), PartitionedRibbonFilterIndexEntry { offset, m: block.m, rank: block.rank, exceptions, inverted: block.inverted, }, ); offset += block.rows.len(); } PartitionedRibbonFilter { index, solution, phantom: std::marker::PhantomData, phantom2: std::marker::PhantomData, } } } /// A pair of ribbon filters that, together, solve the exact membership query problem. pub struct ClubcardBuilder> { /// An approximate membership query filter to whittle down the universe /// to a managable size. approx_filter: Option>, /// An exact membership query filter to confirm membership in R for items that /// pass through the approximate filter. exact_filter: Option>, } impl> Default for ClubcardBuilder { fn default() -> Self { ClubcardBuilder { approx_filter: None, exact_filter: None, } } } impl> ClubcardBuilder { pub fn new() -> Self { ClubcardBuilder::default() } pub fn new_approx_builder(&self, block: &[u8]) -> RibbonBuilder<'static, W, T> { assert!(self.approx_filter.is_none()); RibbonBuilder::new(block, None) } pub fn new_exact_builder<'a>(&'a self, block: &[u8]) -> RibbonBuilder<'a, W, T> { RibbonBuilder::new(block, self.approx_filter.as_ref()) } pub fn collect_approx_ribbons(&mut self, ribbons: Vec>) { self.approx_filter = Some(PartitionedRibbonFilter::from(ribbons)); } pub fn collect_exact_ribbons(&mut self, ribbons: Vec>) { self.exact_filter = Some(PartitionedRibbonFilter::from(ribbons)); } pub fn build>( self, universe: U::UniverseMetadata, partition: U::PartitionMetadata, ) -> Clubcard { let mut index: ClubcardIndex = BTreeMap::new(); assert!(self.approx_filter.is_some()); let approx_filter = self.approx_filter.unwrap(); for (block, entry) in approx_filter.index { let meta = ClubcardIndexEntry { approx_filter_offset: entry.offset, approx_filter_m: entry.m, approx_filter_rank: entry.rank, exact_filter_offset: 0, exact_filter_m: 0, inverted: entry.inverted, exceptions: entry.exceptions, }; index.insert(block, meta); } assert!(self.exact_filter.is_some()); let mut exact_filter = self.exact_filter.unwrap(); for (block, entry) in exact_filter.index { assert!(entry.rank == 1); let meta = index.get_mut(&block).unwrap(); meta.exact_filter_offset = entry.offset; meta.exact_filter_m = entry.m; assert!(meta.inverted == entry.inverted); meta.exceptions.extend(entry.exceptions); } assert!(exact_filter.solution.len() == 1); let exact_filter = exact_filter.solution.pop().unwrap(); Clubcard { universe, partition, index, approx_filter: approx_filter.solution, exact_filter, } } } #[cfg(test)] mod tests { use crate::builder::*; use crate::*; use rand::distributions::{Distribution, Uniform}; use rand::Rng; // Construct the equation a(x) = x_i fn std_eq(i: usize) -> Equation { let mut a = [0u64; W]; a[0] = 1; Equation::homogeneous(i, a) } // Construct an random aligned equation using the given distribution for s. fn rand(s_dist: &impl Distribution) -> Equation { let mut rng = rand::thread_rng(); let s = s_dist.sample(&mut rng); let mut a = [0u64; W]; for a_i in a.iter_mut() { *a_i = rng.gen(); } a[0] |= 1; Equation::inhomogeneous(s, a, rng.gen::() & 1) } impl AsQuery for Equation { fn as_query(&self, _m: usize) -> Equation { self.clone() } fn block(&self) -> &[u8] { &[] } fn discriminant(&self) -> &[u8] { unsafe { std::mem::transmute(&self.a[..]) } } } impl Filterable for Equation { fn included(&self) -> bool { self.b == 0 } } impl Queryable for Equation { type UniverseMetadata = (); type PartitionMetadata = (); fn in_universe(&self, _meta: &Self::UniverseMetadata) -> bool { true } } #[test] fn test_solve_identity() { let n = 1024; let mut builder = RibbonBuilder::new(&[], None); for i in 0usize..n { let eq: Equation<1> = std_eq(i); builder.insert(eq); } let ribbon = ExactRibbon::from(builder); let filter = PartitionedRibbonFilter::from(vec![ribbon]); for i in 0usize..n { let eq: Equation<1> = std_eq(i); assert!(eq.eval(&filter.solution[0]) == 0); } } #[test] fn test_solve_empty() { let builder = RibbonBuilder::<4, Equation<4>>::new(&[0], None); let ribbon = ApproximateRibbon::from(builder); let filter = PartitionedRibbonFilter::from(vec![ribbon]); assert!(!filter.contains(&std_eq(0))); } #[test] fn test_solve_random() { let n = 1024; const W: usize = 2; let mut r = Ribbon::, Exact>::new(&[0], n, false); let mut s_dist = Uniform::new(0, r.m); let mut eqs = Vec::with_capacity(n); for _ in 0..n { let eq = rand(&mut s_dist); eqs.push(eq.clone()); r.insert(eq); } let x = r.solve(&[]); for eq in &eqs { assert!(eq.eval(&x) == eq.b); } } #[test] fn test_total_approx_filter() { // test that approximate filters that encode R=U are encoded // as a zero-length solution vector with m=0 and inverted=true // in the metadata. let n = 1024; let mut approx_builder = RibbonBuilder::new(&[], None); approx_builder.set_universe_size(n); for i in 0usize..n { let eq: Equation<1> = std_eq(i); approx_builder.insert(eq); } let approx_ribbon = ApproximateRibbon::from(approx_builder); let approx_filter = PartitionedRibbonFilter::from(vec![approx_ribbon]); let approx_index_entry = approx_filter .index .get(&vec![]) .expect("should have metadata"); assert!(approx_index_entry.m == 0); assert!(approx_index_entry.rank == 0); assert!(approx_index_entry.exceptions.is_empty()); assert!(approx_index_entry.inverted); for i in 0usize..n { let eq = std_eq(i); assert!(approx_filter.contains(&eq)); } assert!(approx_filter.solution.len() == 0); let mut exact_builder = RibbonBuilder::new(&[], Some(&approx_filter)); for i in 0usize..n { let mut eq = std_eq(i); eq.b = 0; exact_builder.insert(eq); } let exact_ribbon = ExactRibbon::from(exact_builder); let exact_filter = PartitionedRibbonFilter::from(vec![exact_ribbon]); let exact_index_entry = exact_filter .index .get(&vec![]) .expect("should have metadata"); assert!(exact_index_entry.m == 0); assert!(exact_index_entry.rank == 1); assert!(exact_index_entry.exceptions.is_empty()); assert!(exact_index_entry.inverted); for i in 0usize..n { let eq = std_eq(i); assert!(exact_filter.contains(&eq)); } assert!(exact_filter.solution.len() == 1); assert!(exact_filter.solution[0].len() == 0); } #[test] fn test_rank_0_approx_filter() { let n = 1024; let mut builder = RibbonBuilder::new(&[], None); builder.set_universe_size(n); for i in 0usize..768 { let eq: Equation<1> = std_eq(i); builder.insert(eq); } let ribbon = ApproximateRibbon::from(builder); let filter = PartitionedRibbonFilter::from(vec![ribbon]); let entry = filter.index.get(&vec![]).expect("should have metadata"); assert!(entry.rank == 0); assert!(!entry.inverted); assert!(filter.solution.len() == 0); for i in 0usize..n { let eq = std_eq(i); assert!(filter.contains(&eq)); } } }