//! A slow, but clear reference implementation of SeaHash. //! //! # Specification //! //! The input buffer is padded with null bytes until the length is divisible by 8. //! //! We start out with state //! //! ```notest //! a = 0x16f11fe89b0d677c //! b = 0xb480a793d8e6c86c //! c = 0x6fe2e5aaf078ebc9 //! d = 0x14f994a4c5259381 //! ``` //! //! If a seed is given, each of the initial state component are modularly multiplied by the seed. //! //! From the stream, we read one 64-bit block (in little-endian) at a time. This number, `n`, //! determines the new state by: //! //! ```notest //! a' = b //! b' = c //! c' = d //! d' = g(a ⊕ n) //! ``` //! //! `g(x)` is defined as `g(x) = j(h(j(x)))` with `h(x) = (x ≫ 32) ≫ (x ≫ 60)` and `j(x) ≡ px (mod //! 2^64)` with `p = 0x7ed0e9fa0d94a33`. //! //! Let the final state be `(x, y, z, w)`. Then the final result is given by `H = g(x ⊕ y ⊕ z ⊕ w ⊕ //! l)` where `l` is the number of bytes in the original buffer. use helper; /// Read an integer in little-endian. fn read_int(int: &[u8]) -> u64 { debug_assert!( int.len() <= 8, "The buffer length of the integer must be less than or equal to \ the one of an u64." ); // Start at 0. let mut x = 0; for &i in int.iter().rev() { // Shift up a byte. x <<= 8; // Set the lower byte. x |= i as u64; } x } /// A hash state. struct State { /// The `a` substate. a: u64, /// The `b` substate. b: u64, /// The `c` substate. c: u64, /// The `d` substate. d: u64, } impl State { /// Write a 64-bit integer to the state. fn write_u64(&mut self, x: u64) { let mut a = self.a; // Mix `x` into `a`. a = helper::diffuse(a ^ x); // Rotate around. // _______________________ // | v // a <---- b <---- c <---- d self.a = self.b; self.b = self.c; self.c = self.d; self.d = a; } /// Calculate the final hash. fn finish(self, total: usize) -> u64 { // Even though XORing is commutative, it doesn't matter, because the state vector's initial // components are mutually distinct, and thus swapping even and odd chunks will affect the // result, because it is sensitive to the initial condition. To add discreteness, we // diffuse. helper::diffuse( self.a ^ self.b ^ self.c ^ self.d // We XOR in the number of written bytes to make it zero-sensitive when excessive bytes // are written (0u32.0u8 ≠ 0u16.0u8). ^ total as u64, ) } /// Create a new state with some initial values (seed). fn with_seeds(k1: u64, k2: u64, k3: u64, k4: u64) -> State { State { // These values are randomly generated. a: k1, b: k2, c: k3, d: k4, } } } /// A reference implementation of SeaHash. /// /// This is bloody slow when compared to the optimized version. This is because SeaHash was /// specifically designed to take all sorts of hardware and software hacks into account to achieve /// maximal performance, but this makes code significantly less readable. As such, this version has /// only one goal: to make the algorithm readable and understandable. pub fn hash(buf: &[u8]) -> u64 { hash_seeded( buf, 0x16f11fe89b0d677c, 0xb480a793d8e6c86c, 0x6fe2e5aaf078ebc9, 0x14f994a4c5259381, ) } /// The seeded version of the reference implementation. pub fn hash_seeded(buf: &[u8], k1: u64, k2: u64, k3: u64, k4: u64) -> u64 { // Initialize the state. let mut state = State::with_seeds(k1, k2, k3, k4); // Partition the rounded down buffer into chunks of 8 bytes, and iterate over them. The last // block might not be 8 bytes long. for int in buf.chunks(8) { // Read the chunk into an integer and write into the state. state.write_u64(read_int(int)); } // Finish the hash state and return the final value. state.finish(buf.len()) } #[cfg(test)] mod tests { use super::*; #[test] fn shakespear() { assert_eq!(hash(b"to be or not to be"), 1988685042348123509); } }