proxygen
SpookyHashV1.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Spooky Hash
18 // A 128-bit noncryptographic hash, for checksums and table lookup
19 // By Bob Jenkins. Public domain.
20 // Oct 31 2010: published framework, disclaimer ShortHash isn't right
21 // Nov 7 2010: disabled ShortHash
22 // Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again
23 // April 10 2012: buffer overflow on platforms without unaligned reads
24 // July 12 2012: was passing out variables in final to in/out in short
25 // July 30 2012: I reintroduced the buffer overflow
26 
28 
29 #include <folly/CppAttributes.h>
30 
31 #include <cstring>
32 
33 #define ALLOW_UNALIGNED_READS 1
34 
35 namespace folly {
36 namespace hash {
37 
38 // clang-format off
39 
40 //
41 // short hash ... it could be used on any message,
42 // but it's used by Spooky just for short messages.
43 //
45  const void* message,
46  size_t length,
47  uint64_t *hash1,
48  uint64_t *hash2)
49 {
50  uint64_t buf[2*sc_numVars];
51  union
52  {
53  const uint8_t *p8;
54  uint32_t *p32;
55  uint64_t *p64;
56  size_t i;
57  } u;
58 
59  u.p8 = (const uint8_t *)message;
60 
61  if (!ALLOW_UNALIGNED_READS && (u.i & 0x7))
62  {
63  memcpy(buf, message, length);
64  u.p64 = buf;
65  }
66 
67  size_t remainder = length%32;
68  uint64_t a=*hash1;
69  uint64_t b=*hash2;
72 
73  if (length > 15)
74  {
75  const uint64_t *end = u.p64 + (length/32)*4;
76 
77  // handle all complete sets of 32 bytes
78  for (; u.p64 < end; u.p64 += 4)
79  {
80  c += u.p64[0];
81  d += u.p64[1];
82  ShortMix(a,b,c,d);
83  a += u.p64[2];
84  b += u.p64[3];
85  }
86 
87  //Handle the case of 16+ remaining bytes.
88  if (remainder >= 16)
89  {
90  c += u.p64[0];
91  d += u.p64[1];
92  ShortMix(a,b,c,d);
93  u.p64 += 2;
94  remainder -= 16;
95  }
96  }
97 
98  // Handle the last 0..15 bytes, and its length
99  d = ((uint64_t)length) << 56;
100  switch (remainder)
101  {
102  case 15:
103  d += ((uint64_t)u.p8[14]) << 48;
105  case 14:
106  d += ((uint64_t)u.p8[13]) << 40;
108  case 13:
109  d += ((uint64_t)u.p8[12]) << 32;
111  case 12:
112  d += u.p32[2];
113  c += u.p64[0];
114  break;
115  case 11:
116  d += ((uint64_t)u.p8[10]) << 16;
118  case 10:
119  d += ((uint64_t)u.p8[9]) << 8;
121  case 9:
122  d += (uint64_t)u.p8[8];
124  case 8:
125  c += u.p64[0];
126  break;
127  case 7:
128  c += ((uint64_t)u.p8[6]) << 48;
130  case 6:
131  c += ((uint64_t)u.p8[5]) << 40;
133  case 5:
134  c += ((uint64_t)u.p8[4]) << 32;
136  case 4:
137  c += u.p32[0];
138  break;
139  case 3:
140  c += ((uint64_t)u.p8[2]) << 16;
142  case 2:
143  c += ((uint64_t)u.p8[1]) << 8;
145  case 1:
146  c += (uint64_t)u.p8[0];
147  break;
148  case 0:
149  c += sc_const;
150  d += sc_const;
151  }
152  ShortEnd(a,b,c,d);
153  *hash1 = a;
154  *hash2 = b;
155 }
156 
157 
158 
159 
160 // do the whole hash in one call
162  const void *message,
163  size_t length,
164  uint64_t *hash1,
165  uint64_t *hash2)
166 {
167  if (length < sc_bufSize)
168  {
169  Short(message, length, hash1, hash2);
170  return;
171  }
172 
173  uint64_t h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
174  uint64_t buf[sc_numVars];
175  uint64_t *end;
176  union
177  {
178  const uint8_t *p8;
179  uint64_t *p64;
180  size_t i;
181  } u;
182  size_t remainder;
183 
184  h0=h3=h6=h9 = *hash1;
185  h1=h4=h7=h10 = *hash2;
186  h2=h5=h8=h11 = sc_const;
187 
188  u.p8 = (const uint8_t *)message;
189  end = u.p64 + (length/sc_blockSize)*sc_numVars;
190 
191  // handle all whole sc_blockSize blocks of bytes
192  if (ALLOW_UNALIGNED_READS || ((u.i & 0x7) == 0))
193  {
194  while (u.p64 < end)
195  {
196  Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
197  u.p64 += sc_numVars;
198  }
199  }
200  else
201  {
202  while (u.p64 < end)
203  {
204  memcpy(buf, u.p64, sc_blockSize);
205  Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
206  u.p64 += sc_numVars;
207  }
208  }
209 
210  // handle the last partial block of sc_blockSize bytes
211  remainder = (length - ((const uint8_t *)end-(const uint8_t *)message));
212  memcpy(buf, end, remainder);
213  memset(((uint8_t *)buf)+remainder, 0, sc_blockSize-remainder);
214  ((uint8_t*)buf)[sc_blockSize - 1] = uint8_t(remainder);
215  Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
216 
217  // do some final mixing
218  End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
219  *hash1 = h0;
220  *hash2 = h1;
221 }
222 
223 
224 
225 // init spooky state
227 {
228  m_length = 0;
229  m_remainder = 0;
230  m_state[0] = seed1;
231  m_state[1] = seed2;
232 }
233 
234 
235 // add a message fragment to the state
236 void SpookyHashV1::Update(const void *message, size_t length)
237 {
238  uint64_t h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
239  size_t newLength = length + m_remainder;
240  uint8_t remainder;
241  union
242  {
243  const uint8_t *p8;
244  uint64_t *p64;
245  size_t i;
246  } u;
247  const uint64_t *end;
248 
249  // Is this message fragment too short? If it is, stuff it away.
250  if (newLength < sc_bufSize)
251  {
252  memcpy(&((uint8_t *)m_data)[m_remainder], message, length);
253  m_length = length + m_length;
254  m_remainder = (uint8_t)newLength;
255  return;
256  }
257 
258  // init the variables
259  if (m_length < sc_bufSize)
260  {
261  h0=h3=h6=h9 = m_state[0];
262  h1=h4=h7=h10 = m_state[1];
263  h2=h5=h8=h11 = sc_const;
264  }
265  else
266  {
267  h0 = m_state[0];
268  h1 = m_state[1];
269  h2 = m_state[2];
270  h3 = m_state[3];
271  h4 = m_state[4];
272  h5 = m_state[5];
273  h6 = m_state[6];
274  h7 = m_state[7];
275  h8 = m_state[8];
276  h9 = m_state[9];
277  h10 = m_state[10];
278  h11 = m_state[11];
279  }
280  m_length = length + m_length;
281 
282  // if we've got anything stuffed away, use it now
283  if (m_remainder)
284  {
286  memcpy(&(((uint8_t *)m_data)[m_remainder]), message, prefix);
287  u.p64 = m_data;
288  Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
289  Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
290  u.p8 = ((const uint8_t *)message) + prefix;
291  length -= prefix;
292  }
293  else
294  {
295  u.p8 = (const uint8_t *)message;
296  }
297 
298  // handle all whole blocks of sc_blockSize bytes
299  end = u.p64 + (length/sc_blockSize)*sc_numVars;
300  remainder = (uint8_t)(length-((const uint8_t *)end-u.p8));
301  if (ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0)
302  {
303  while (u.p64 < end)
304  {
305  Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
306  u.p64 += sc_numVars;
307  }
308  }
309  else
310  {
311  while (u.p64 < end)
312  {
313  memcpy(m_data, u.p8, sc_blockSize);
314  Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
315  u.p64 += sc_numVars;
316  }
317  }
318 
319  // stuff away the last few bytes
320  m_remainder = remainder;
321  memcpy(m_data, end, remainder);
322 
323  // stuff away the variables
324  m_state[0] = h0;
325  m_state[1] = h1;
326  m_state[2] = h2;
327  m_state[3] = h3;
328  m_state[4] = h4;
329  m_state[5] = h5;
330  m_state[6] = h6;
331  m_state[7] = h7;
332  m_state[8] = h8;
333  m_state[9] = h9;
334  m_state[10] = h10;
335  m_state[11] = h11;
336 }
337 
338 
339 // report the hash for the concatenation of all message fragments so far
341 {
342  // init the variables
343  if (m_length < sc_bufSize)
344  {
345  *hash1 = m_state[0];
346  *hash2 = m_state[1];
347  Short( m_data, m_length, hash1, hash2);
348  return;
349  }
350 
351  const uint64_t *data = (const uint64_t *)m_data;
352  uint8_t remainder = m_remainder;
353 
354  uint64_t h0 = m_state[0];
355  uint64_t h1 = m_state[1];
356  uint64_t h2 = m_state[2];
357  uint64_t h3 = m_state[3];
358  uint64_t h4 = m_state[4];
359  uint64_t h5 = m_state[5];
360  uint64_t h6 = m_state[6];
361  uint64_t h7 = m_state[7];
362  uint64_t h8 = m_state[8];
363  uint64_t h9 = m_state[9];
364  uint64_t h10 = m_state[10];
365  uint64_t h11 = m_state[11];
366 
367  if (remainder >= sc_blockSize)
368  {
369  // m_data can contain two blocks; handle any whole first block
370  Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
371  data += sc_numVars;
372  remainder -= sc_blockSize;
373  }
374 
375  // mix in the last partial block, and the length mod sc_blockSize
376  memset(&((uint8_t *)data)[remainder], 0, (sc_blockSize-remainder));
377 
378  ((uint8_t *)data)[sc_blockSize-1] = remainder;
379  Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
380 
381  // do some final mixing
382  End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
383 
384  *hash1 = h0;
385  *hash2 = h1;
386 }
387 
388 // clang-format on
389 
390 } // namespace hash
391 } // namespace folly
Definition: test.c:42
static const size_t sc_numVars
Definition: SpookyHashV1.h:281
static void Mix(const uint64_t *data, uint64_t &s0, uint64_t &s1, uint64_t &s2, uint64_t &s3, uint64_t &s4, uint64_t &s5, uint64_t &s6, uint64_t &s7, uint64_t &s8, uint64_t &s9, uint64_t &s10, uint64_t &s11)
Definition: SpookyHashV1.h:141
char b
static const uint64_t sc_const
Definition: SpookyHashV1.h:296
uint64_t m_state[sc_numVars]
Definition: SpookyHashV1.h:299
void Final(uint64_t *hash1, uint64_t *hash2)
static void Hash128(const void *message, size_t length, uint64_t *hash1, uint64_t *hash2)
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
bool prefix(Cursor &c, uint32_t expected)
void Update(const void *message, size_t length)
uint64_t m_data[2 *sc_numVars]
Definition: SpookyHashV1.h:298
void Init(uint64_t seed1, uint64_t seed2)
#define ALLOW_UNALIGNED_READS
static const size_t sc_bufSize
Definition: SpookyHashV1.h:287
auto end(TestAdlIterable &instance)
Definition: ForeachTest.cpp:62
constexpr auto data(C &c) -> decltype(c.data())
Definition: Access.h:71
std::string message
Definition: SPDYCodec.cpp:133
char a
static void ShortEnd(uint64_t &h0, uint64_t &h1, uint64_t &h2, uint64_t &h3)
Definition: SpookyHashV1.h:250
static const size_t sc_blockSize
Definition: SpookyHashV1.h:284
static void Short(const void *message, size_t length, uint64_t *hash1, uint64_t *hash2)
char c
static void ShortMix(uint64_t &h0, uint64_t &h1, uint64_t &h2, uint64_t &h3)
Definition: SpookyHashV1.h:221
#define FOLLY_FALLTHROUGH
Definition: CppAttributes.h:63
static void End(uint64_t &h0, uint64_t &h1, uint64_t &h2, uint64_t &h3, uint64_t &h4, uint64_t &h5, uint64_t &h6, uint64_t &h7, uint64_t &h8, uint64_t &h9, uint64_t &h10, uint64_t &h11)
Definition: SpookyHashV1.h:196