proxygen
PerfectIndexMap.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2004-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree. An additional grant
7  * of patent rights can be found in the PATENTS file in the same directory.
8  *
9  */
10 #pragma once
11 
12 #include <folly/FBVector.h>
13 #include <folly/Optional.h>
15 #include <string>
16 
17 // TODO: consider changing API methods that take in an reference with versions
18 // that don't so that callers can give up ownership if they like via std::move
19 // operator (and subsequently we would use it as well).
20 // TODO: templatize the use of string as the value type; can instead use
21 // fbstring
22 
23 namespace proxygen {
24 
25 // A type of key-value map implemented by indexing values in a vector.
26 // Performance is on average that of an unordered_map or better when the
27 // index is used effectively. CPU and memory usage are significantly reduced
28 // due to the use of a perfect hashing function and vectors.
29 
30 // Key is a one byte (i.e. uint8_t) type.
31 // OtherKey represents the value of Key for which strings which
32 // do not possess unique mappings are mapped to.
33 // NoneKey represents the value of Key which no strings map to;
34 // field is reserved to denote deleted elements within the map.
35 // PerfectHashStrToKey represents the perfect hash mapping function used
36 // internally to map string keys to their Key values
37 // AllowDuplicates represents a flag that controls whether the map supports
38 // duplicate keys. It is mainly used for optimization purposes as the
39 // implementation of default handling adds additional useless work for such
40 // cases in which duplicates are not tolerated.
41 template <
42  typename Key,
43  Key OtherKey,
44  Key NoneKey,
45  Key (*PerfectHashStrToKey)(const std::string&),
46  bool AllowDuplicates,
47  bool CaseInsensitive>
49  public:
50  static_assert(sizeof(Key) == 1, "Key must be of size 1 byte.");
52  virtual ~PerfectIndexMap() = default;
53 
54  // Getters into the underlying map.
55 
57  const std::string& keyStr) const {
58  auto key = PerfectHashStrToKey(keyStr);
59  if (key == OtherKey) {
60  const std::string *result = getSingleOtherKey(keyStr);
61  return (
62  result == nullptr ? folly::none : folly::Optional<std::string>(*result)
63  );
64  } else {
65  return getSingleOrNone(key);
66  }
67  }
69  CHECK(key != OtherKey && key != NoneKey);
70  const std::string *result = getSingleKey(key);
71  return (
72  result == nullptr ? folly::none : folly::Optional<std::string>(*result)
73  );
74  }
75 
76  // Adders into the underlying map.
77 
78  void add(const std::string &keyStr, const std::string &value) {
79  CHECK(AllowDuplicates);
80  auto key = PerfectHashStrToKey(keyStr);
81  if (key == OtherKey) {
82  addOtherKeyToIndex(keyStr, value);
83  } else {
84  add(key, value);
85  }
86  }
87 
88  void add(Key key, const std::string &value) {
89  CHECK(AllowDuplicates && key != OtherKey && key != NoneKey);
90  addKeyToIndex(key, value);
91  }
92 
93  // Setters into the underyling map.
94  // Functionally, only one reference to the specified key remains after
95  // the operation completes regardless of the state of AllowDuplicates
96 
97  void set(const std::string &keyStr, const std::string &value) {
98  auto key = PerfectHashStrToKey(keyStr);
99  if (key == OtherKey) {
100  setOtherKey(keyStr, value);
101  } else {
102  set(key, value);
103  }
104  }
105 
106  void set(Key key, const std::string &value) {
107  CHECK(key != OtherKey && key != NoneKey);
108  setKey(key, value);
109  }
110 
111  // Removers from the underlying map.
112  // Functionally, all references to the specified key are removed after the
113  // operation completes, regardless of the state of UnqiueEntries
114 
115  bool remove(const std::string &keyStr) {
116  auto key = PerfectHashStrToKey(keyStr);
117  if (key == OtherKey) {
118  return removeOtherKey(keyStr);
119  } else {
120  return remove(key);
121  }
122  }
123  bool remove(Key key) {
124  CHECK(key != OtherKey && key != NoneKey);
125  return removeKey(key);
126  }
127 
128  size_t size() {
129  return keys_.size() - noneKeyCount_;
130  }
131 
132  private:
133 
134  // Private implementations of public methods for code reuse.
135  // Technically this flow adds more copies (for ex in the case of passing
136  // nullptr around as keyStr when not needed) and so slows things down a bit
137  // but this way the code can effectively be reused and there is a single
138  // implementation source.
139 
140  const std::string * getSingleKey(Key key) const {
141  const Key* data = keys_.data();
142  if (data) {
143  auto offset = searchForKey(key, data);
144  if (data) {
145  if (AllowDuplicates && searchForKey(key, ++data) != -1) {
146  return nullptr;
147  }
148  return &values_[offset];
149  }
150  }
151  return nullptr;
152  }
153  const std::string* getSingleOtherKey(const std::string &keyStr) const {
154  size_t searchIndex = 0;
155  auto index = searchForOtherKey(keyStr, searchIndex);
156  if (index != -1) {
157  if (AllowDuplicates && searchForOtherKey(keyStr, ++searchIndex) != -1) {
158  return nullptr;
159  }
160  return &values_[index];
161  }
162  return nullptr;
163  }
164 
165  void setKey(Key key, const std::string &value) {
166  const Key* data = keys_.data();
167  bool set = false;
168  std::ptrdiff_t offset;
169  while (data) {
170  offset = searchForKey(key, data);
171  if (data) {
172  if (!set) {
173  replaceKeyAtIndex(offset, key, value);
174  if (AllowDuplicates) {
175  set = true;
176  } else {
177  return;
178  }
179  } else {
180  removeAtIndex(offset);
181  }
182  ++data;
183  }
184  }
185  if (!set) {
186  addKeyToIndex(key, value);
187  }
188  }
189  void setOtherKey(const std::string &keyStr, const std::string &value) {
190  bool set = false;
191  size_t searchIndex = 0;
192  while (searchForOtherKey(keyStr, searchIndex) != -1) {
193  if (!set) {
194  replaceOtherKeyAtIndex(searchIndex, keyStr, value);
195  if (AllowDuplicates) {
196  set = true;
197  } else {
198  return;
199  }
200  } else {
201  removeAtIndex(otherKeyNamesKeysIndex_[searchIndex]);
202  }
203  ++searchIndex;
204  }
205  if (!set) {
206  addOtherKeyToIndex(keyStr, value);
207  }
208  }
209 
210  bool removeKey(Key key) {
211  const Key* data = keys_.data();
212  bool anyRemoved = false;
213  std::ptrdiff_t offset;
214  while (data) {
215  offset = searchForKey(key, data);
216  if (data) {
217  removeAtIndex(offset);
218  anyRemoved = true;
219  if (!AllowDuplicates) {
220  break;
221  }
222  ++data;
223  }
224  }
225  return anyRemoved;
226  }
227  bool removeOtherKey(const std::string &keyStr) {
228  bool anyRemoved = false;
229  size_t searchIndex = 0;
230  while (searchForOtherKey(keyStr, searchIndex) != -1) {
231  removeAtIndex(otherKeyNamesKeysIndex_[searchIndex]);
232  anyRemoved = true;
233  if (!AllowDuplicates) {
234  break;
235  }
236  ++searchIndex;
237  }
238  return anyRemoved;
239  }
240 
241  // Utility methods for searching our index. The data pointer / start index
242  // are passed by reference so they can be updated by the search itself. The
243  // methods return on the first occurrence found from the specified start
244  // searching position.
245  std::ptrdiff_t searchForKey(Key key, const Key* &data) const {
246  if (data) {
247  data = (Key*)memchr(
248  (void*)data, key, keys_.size() - (data - keys_.data()));
249  if (data) {
250  return data - keys_.data();
251  }
252  }
253  return -1;
254  }
255  std::ptrdiff_t searchForOtherKey(
256  const std::string &keyStr, size_t &startIndex) const {
257  while (startIndex < otherKeyNamesKeysIndex_.size()) {
258  // The key can only be OtherKey or NoneKey
259  if (keys_[otherKeyNamesKeysIndex_[startIndex]] == OtherKey) {
260  if (CaseInsensitive) {
261  // One might be tempted to merge this statement with that above
262  // but that would be wrong. If CaseInsensitive is true, we do not
263  // want any other check evaluating.
264  if (caseInsensitiveEqual(otherKeyNames_[startIndex], keyStr)) {
265  return otherKeyNamesKeysIndex_[startIndex];
266  }
267  } else {
268  if (otherKeyNames_[startIndex] == keyStr) {
269  return otherKeyNamesKeysIndex_[startIndex];
270  }
271  }
272  }
273  ++startIndex;
274  }
275  return -1;
276  }
277 
278  // Utility methods for adding / modifying to our index.
279  void addKeyToIndex(Key key, const std::string &value) {
280  keys_.push_back(key);
281  values_.emplace_back(value);
282  }
283  void addOtherKeyToIndex(const std::string &keyStr, const std::string &value) {
284  keys_.push_back(OtherKey);
285  otherKeyNames_.emplace_back(keyStr);
286  otherKeyNamesKeysIndex_.push_back(keys_.size() - 1);
287  values_.emplace_back(value);
288  ++otherKeyCount_;
289  }
291  std::ptrdiff_t index, Key key, const std::string &value) {
292  keys_[index] = key;
293  values_[index] = value;
294  }
296  size_t namesIndex, const std::string &keyStr, const std::string &value) {
297  otherKeyNames_[namesIndex] = keyStr;
298  values_[otherKeyNamesKeysIndex_[namesIndex]] = value;
299  }
300 
301  // Utility methods for removing from our index.
302  void removeAtIndex(std::ptrdiff_t index) {
303  CHECK(keys_[index] != NoneKey);
304  if (keys_[index] == OtherKey) {
305  --otherKeyCount_;
306  }
307  keys_[index] = NoneKey;
308  ++noneKeyCount_;
309  // We purposefully omit actually clearing some other data structures
310  // as it is often useful for debugging purposes to leave this data around.
311  }
312 
313  /*
314  * Illustrating the below data structures, for a map where:
315  * otherKeyCount_ == 2
316  * noneKeyCount_ == 1
317  * keys_.size() == 5
318  * using OK = OtherKey
319  * using NK = NoneKey
320  * using K = some key which is not OK or NK
321  * using --- = out of bounds
322  * The above config implies the size of our map is 4, with 2 OKs and 2 Ks.
323  * Also take note from a debugging standpoint, the fact that no entry in
324  * otherKeyNames_ maps back to index 3 (via otherKeyNamesKeysIndex_) suggests
325  * the removed key (NK) was regular K, not an OK, before being removed.
326  *
327  * ------|-------|-------------------------|----------------|-----------------|
328  * Index | keys_ | otherKeyNamesKeysIndex_ | otherKeyNames_ | values_ |
329  * ______|_______|_________________________|________________|_________________|
330  * 0 | OK1 | 0 | <OK1_name> | <OK1_value> |
331  * 1 | K1 | 4 | <OK2_name> | <K1_value> |
332  * 2 | K2 | --- | --- | <K2_value> |
333  * 3 | NK | --- | --- | <NK_value> |
334  * 4 | OK2 | --- | --- | <OK2_value> |
335  * 5 | --- | --- | --- | --- |
336  * ... | ... | ... | ... | ... |
337  * ----------------------------------------------------------------------------
338  */
339 
340  size_t otherKeyCount_{0};
341  size_t noneKeyCount_{0};
342  // And using the above two fields we can obviously infer what the
343  // notOtherKeyCount is.
344 
345  // 1-byte Key vector used as a primary index into values_.
346  // Thus keys_.size() == values_.size().
347  // The total size of the map is thus always <= keys_.size().
348  // The reason it is not always == is because the map supports removals and on
349  // such operations, we do not resize this vector.
351 
352  // A vector used to reverse lookup a particular OtherKey's entry
353  // (in otherKeyNames_) position within keys_.
354  // Strictly speaking: otherKeyNamesKeysIndex_.size() == otherKeyNames_.size().
355  // Stated differently, the N-th OtherKey's name is in otherKeyNames_[N] and
356  // its corresponding value is in values_(namesKeyIndex[N]) because
357  // namesKeyIndex[N] refers to the entry's position in keys_.
359 
360  // Storage for names whose keys are mapped to OtherKey.
361  // This vector can never have more elements than the total numer of OtherKey
362  // entries in the map as it should be equal to this count.
363  // Thus strictly speaking: otherKeyNames_.size() <= keys_.size() where the
364  // only time they could ever be equal is if EVERY entry EVER inserted into
365  // the map was an OtherKey.
367 
368  // Storage for all values, OtherKey or other.
369  // Thus values_.size() == keys_.size().
371 };
372 
373 } // proxygen
bool removeOtherKey(const std::string &keyStr)
bool caseInsensitiveEqual(folly::StringPiece s, folly::StringPiece t)
Definition: UtilInl.h:17
auto add
Definition: BaseTest.cpp:70
internal::KeyMatcher< M > Key(M inner_matcher)
void add(Key key, const std::string &value)
std::ptrdiff_t searchForOtherKey(const std::string &keyStr, size_t &startIndex) const
folly::fbvector< std::string > otherKeyNames_
const std::string * getSingleKey(Key key) const
folly::fbvector< std::string > values_
const std::string * getSingleOtherKey(const std::string &keyStr) const
std::ptrdiff_t searchForKey(Key key, const Key *&data) const
folly::Optional< std::string > getSingleOrNone(Key key) const
void setOtherKey(const std::string &keyStr, const std::string &value)
void replaceKeyAtIndex(std::ptrdiff_t index, Key key, const std::string &value)
void addOtherKeyToIndex(const std::string &keyStr, const std::string &value)
void removeAtIndex(std::ptrdiff_t index)
void add(const std::string &keyStr, const std::string &value)
static const char *const value
Definition: Conv.cpp:50
folly::Optional< std::string > getSingleOrNone(const std::string &keyStr) const
void addKeyToIndex(Key key, const std::string &value)
void setKey(Key key, const std::string &value)
const char * string
Definition: Conv.cpp:212
folly::fbvector< size_t > otherKeyNamesKeysIndex_
void replaceOtherKeyAtIndex(size_t namesIndex, const std::string &keyStr, const std::string &value)
static constexpr uint64_t data[1]
Definition: Fingerprint.cpp:43
folly::fbvector< Key > keys_
constexpr None none
Definition: Optional.h:87