proxygen
SpookyHashV2.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Spooky Hash
18 // A 128-bit noncryptographic hash, for checksums and table lookup
19 // By Bob Jenkins. Public domain.
20 // Oct 31 2010: published framework, disclaimer ShortHash isn't right
21 // Nov 7 2010: disabled ShortHash
22 // Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again
23 // April 10 2012: buffer overflow on platforms without unaligned reads
24 // July 12 2012: was passing out variables in final to in/out in short
25 // July 30 2012: I reintroduced the buffer overflow
26 // August 5 2012: SpookyV2: d = should be d += in short hash, and remove
27 // extra mix from long hash
28 
30 
31 #include <folly/CppAttributes.h>
32 #include <folly/Portability.h>
33 
34 #include <cstring>
35 
36 namespace folly {
37 namespace hash {
38 
39 // clang-format off
40 
41 //
42 // short hash ... it could be used on any message,
43 // but it's used by Spooky just for short messages.
44 //
46  const void *message,
47  size_t length,
48  uint64_t *hash1,
49  uint64_t *hash2)
50 {
51  uint64_t buf[2*sc_numVars];
52  union
53  {
54  const uint8_t *p8;
55  uint32_t *p32;
56  uint64_t *p64;
57  size_t i;
58  } u;
59 
60  u.p8 = (const uint8_t *)message;
61 
62  if (!kHasUnalignedAccess && (u.i & 0x7))
63  {
64  memcpy(buf, message, length);
65  u.p64 = buf;
66  }
67 
68  size_t remainder = length%32;
69  uint64_t a=*hash1;
70  uint64_t b=*hash2;
73 
74  if (length > 15)
75  {
76  const uint64_t *end = u.p64 + (length/32)*4;
77 
78  // handle all complete sets of 32 bytes
79  for (; u.p64 < end; u.p64 += 4)
80  {
81  c += u.p64[0];
82  d += u.p64[1];
83  ShortMix(a,b,c,d);
84  a += u.p64[2];
85  b += u.p64[3];
86  }
87 
88  //Handle the case of 16+ remaining bytes.
89  if (remainder >= 16)
90  {
91  c += u.p64[0];
92  d += u.p64[1];
93  ShortMix(a,b,c,d);
94  u.p64 += 2;
95  remainder -= 16;
96  }
97  }
98 
99  // Handle the last 0..15 bytes, and its length
100  d += ((uint64_t)length) << 56;
101  switch (remainder)
102  {
103  case 15:
104  d += ((uint64_t)u.p8[14]) << 48;
106  case 14:
107  d += ((uint64_t)u.p8[13]) << 40;
109  case 13:
110  d += ((uint64_t)u.p8[12]) << 32;
112  case 12:
113  d += u.p32[2];
114  c += u.p64[0];
115  break;
116  case 11:
117  d += ((uint64_t)u.p8[10]) << 16;
119  case 10:
120  d += ((uint64_t)u.p8[9]) << 8;
122  case 9:
123  d += (uint64_t)u.p8[8];
125  case 8:
126  c += u.p64[0];
127  break;
128  case 7:
129  c += ((uint64_t)u.p8[6]) << 48;
131  case 6:
132  c += ((uint64_t)u.p8[5]) << 40;
134  case 5:
135  c += ((uint64_t)u.p8[4]) << 32;
137  case 4:
138  c += u.p32[0];
139  break;
140  case 3:
141  c += ((uint64_t)u.p8[2]) << 16;
143  case 2:
144  c += ((uint64_t)u.p8[1]) << 8;
146  case 1:
147  c += (uint64_t)u.p8[0];
148  break;
149  case 0:
150  c += sc_const;
151  d += sc_const;
152  }
153  ShortEnd(a,b,c,d);
154  *hash1 = a;
155  *hash2 = b;
156 }
157 
158 
159 
160 
161 // do the whole hash in one call
163  const void *message,
164  size_t length,
165  uint64_t *hash1,
166  uint64_t *hash2)
167 {
168  if (length < sc_bufSize)
169  {
170  Short(message, length, hash1, hash2);
171  return;
172  }
173 
174  uint64_t h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
175  uint64_t buf[sc_numVars];
176  uint64_t *end;
177  union
178  {
179  const uint8_t *p8;
180  uint64_t *p64;
181  size_t i;
182  } u;
183  size_t remainder;
184 
185  h0=h3=h6=h9 = *hash1;
186  h1=h4=h7=h10 = *hash2;
187  h2=h5=h8=h11 = sc_const;
188 
189  u.p8 = (const uint8_t *)message;
190  end = u.p64 + (length/sc_blockSize)*sc_numVars;
191 
192  // handle all whole sc_blockSize blocks of bytes
193  if (kHasUnalignedAccess || ((u.i & 0x7) == 0))
194  {
195  while (u.p64 < end)
196  {
197  Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
198  u.p64 += sc_numVars;
199  }
200  }
201  else
202  {
203  while (u.p64 < end)
204  {
205  memcpy(buf, u.p64, sc_blockSize);
206  Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
207  u.p64 += sc_numVars;
208  }
209  }
210 
211  // handle the last partial block of sc_blockSize bytes
212  remainder = (length - ((const uint8_t *)end-(const uint8_t *)message));
213  memcpy(buf, end, remainder);
214  memset(((uint8_t *)buf)+remainder, 0, sc_blockSize-remainder);
215  ((uint8_t*)buf)[sc_blockSize - 1] = uint8_t(remainder);
216 
217  // do some final mixing
218  End(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
219  *hash1 = h0;
220  *hash2 = h1;
221 }
222 
223 
224 
225 // init spooky state
227 {
228  m_length = 0;
229  m_remainder = 0;
230  m_state[0] = seed1;
231  m_state[1] = seed2;
232 }
233 
234 
235 // add a message fragment to the state
236 void SpookyHashV2::Update(const void *message, size_t length)
237 {
238  uint64_t h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
239  size_t newLength = length + m_remainder;
240  uint8_t remainder;
241  union
242  {
243  const uint8_t *p8;
244  uint64_t *p64;
245  size_t i;
246  } u;
247  const uint64_t *end;
248 
249  // Is this message fragment too short? If it is, stuff it away.
250  if (newLength < sc_bufSize)
251  {
252  memcpy(&((uint8_t *)m_data)[m_remainder], message, length);
253  m_length = length + m_length;
254  m_remainder = (uint8_t)newLength;
255  return;
256  }
257 
258  // init the variables
259  if (m_length < sc_bufSize)
260  {
261  h0=h3=h6=h9 = m_state[0];
262  h1=h4=h7=h10 = m_state[1];
263  h2=h5=h8=h11 = sc_const;
264  }
265  else
266  {
267  h0 = m_state[0];
268  h1 = m_state[1];
269  h2 = m_state[2];
270  h3 = m_state[3];
271  h4 = m_state[4];
272  h5 = m_state[5];
273  h6 = m_state[6];
274  h7 = m_state[7];
275  h8 = m_state[8];
276  h9 = m_state[9];
277  h10 = m_state[10];
278  h11 = m_state[11];
279  }
280  m_length = length + m_length;
281 
282  // if we've got anything stuffed away, use it now
283  if (m_remainder)
284  {
286  memcpy(&(((uint8_t *)m_data)[m_remainder]), message, prefix);
287  u.p64 = m_data;
288  Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
289  Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
290  u.p8 = ((const uint8_t *)message) + prefix;
291  length -= prefix;
292  }
293  else
294  {
295  u.p8 = (const uint8_t *)message;
296  }
297 
298  // handle all whole blocks of sc_blockSize bytes
299  end = u.p64 + (length/sc_blockSize)*sc_numVars;
300  remainder = (uint8_t)(length-((const uint8_t *)end-u.p8));
301  if (kHasUnalignedAccess || (u.i & 0x7) == 0)
302  {
303  while (u.p64 < end)
304  {
305  Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
306  u.p64 += sc_numVars;
307  }
308  }
309  else
310  {
311  while (u.p64 < end)
312  {
313  memcpy(m_data, u.p8, sc_blockSize);
314  Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
315  u.p64 += sc_numVars;
316  }
317  }
318 
319  // stuff away the last few bytes
320  m_remainder = remainder;
321  memcpy(m_data, end, remainder);
322 
323  // stuff away the variables
324  m_state[0] = h0;
325  m_state[1] = h1;
326  m_state[2] = h2;
327  m_state[3] = h3;
328  m_state[4] = h4;
329  m_state[5] = h5;
330  m_state[6] = h6;
331  m_state[7] = h7;
332  m_state[8] = h8;
333  m_state[9] = h9;
334  m_state[10] = h10;
335  m_state[11] = h11;
336 }
337 
338 
339 // report the hash for the concatenation of all message fragments so far
340 void SpookyHashV2::Final(uint64_t *hash1, uint64_t *hash2) const
341 {
342  // init the variables
343  if (m_length < sc_bufSize)
344  {
345  *hash1 = m_state[0];
346  *hash2 = m_state[1];
347  Short( m_data, m_length, hash1, hash2);
348  return;
349  }
350 
351  uint64_t buf[2*sc_numVars];
352  memcpy(buf, m_data, sizeof(buf));
353  uint64_t *data = buf;
354  uint8_t remainder = m_remainder;
355 
356  uint64_t h0 = m_state[0];
357  uint64_t h1 = m_state[1];
358  uint64_t h2 = m_state[2];
359  uint64_t h3 = m_state[3];
360  uint64_t h4 = m_state[4];
361  uint64_t h5 = m_state[5];
362  uint64_t h6 = m_state[6];
363  uint64_t h7 = m_state[7];
364  uint64_t h8 = m_state[8];
365  uint64_t h9 = m_state[9];
366  uint64_t h10 = m_state[10];
367  uint64_t h11 = m_state[11];
368 
369  if (remainder >= sc_blockSize)
370  {
371  // m_data can contain two blocks; handle any whole first block
372  Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
373  data += sc_numVars;
374  remainder -= sc_blockSize;
375  }
376 
377  // mix in the last partial block, and the length mod sc_blockSize
378  memset(&((uint8_t *)data)[remainder], 0, (sc_blockSize-remainder));
379 
380  ((uint8_t *)data)[sc_blockSize-1] = remainder;
381 
382  // do some final mixing
383  End(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
384 
385  *hash1 = h0;
386  *hash2 = h1;
387 }
388 
389 // clang-format on
390 
391 } // namespace hash
392 } // namespace folly
constexpr bool kHasUnalignedAccess
Definition: Portability.h:29
static void Short(const void *message, size_t length, uint64_t *hash1, uint64_t *hash2)
Definition: test.c:42
char b
static void ShortEnd(uint64_t &h0, uint64_t &h1, uint64_t &h2, uint64_t &h3)
Definition: SpookyHashV2.h:255
static void Mix(const uint64_t *data, uint64_t &s0, uint64_t &s1, uint64_t &s2, uint64_t &s3, uint64_t &s4, uint64_t &s5, uint64_t &s6, uint64_t &s7, uint64_t &s8, uint64_t &s9, uint64_t &s10, uint64_t &s11)
Definition: SpookyHashV2.h:142
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
void Init(uint64_t seed1, uint64_t seed2)
bool prefix(Cursor &c, uint32_t expected)
uint64_t m_state[sc_numVars]
Definition: SpookyHashV2.h:304
static constexpr size_t sc_blockSize
Definition: SpookyHashV2.h:289
static void Hash128(const void *message, size_t length, uint64_t *hash1, uint64_t *hash2)
auto end(TestAdlIterable &instance)
Definition: ForeachTest.cpp:62
constexpr auto data(C &c) -> decltype(c.data())
Definition: Access.h:71
std::string message
Definition: SPDYCodec.cpp:133
char a
static constexpr size_t sc_bufSize
Definition: SpookyHashV2.h:292
void Final(uint64_t *hash1, uint64_t *hash2) const
static constexpr size_t sc_numVars
Definition: SpookyHashV2.h:286
static void ShortMix(uint64_t &h0, uint64_t &h1, uint64_t &h2, uint64_t &h3)
Definition: SpookyHashV2.h:226
void Update(const void *message, size_t length)
char c
static void End(const uint64_t *data, uint64_t &h0, uint64_t &h1, uint64_t &h2, uint64_t &h3, uint64_t &h4, uint64_t &h5, uint64_t &h6, uint64_t &h7, uint64_t &h8, uint64_t &h9, uint64_t &h10, uint64_t &h11)
Definition: SpookyHashV2.h:197
#define FOLLY_FALLTHROUGH
Definition: CppAttributes.h:63
uint64_t m_data[2 *sc_numVars]
Definition: SpookyHashV2.h:303
static constexpr uint64_t sc_const
Definition: SpookyHashV2.h:301