proxygen
Huffman.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree. An additional grant
7  * of patent rights can be found in the PATENTS file in the same directory.
8  *
9  */
11 
12 #include <folly/Indestructible.h>
14 
15 using folly::IOBuf;
16 using std::pair;
17 using std::string;
18 
19 namespace proxygen { namespace huffman {
20 
21 HuffTree::HuffTree(const uint32_t* codes, const uint8_t* bits)
22  : codes_(codes), bits_(bits) {
23  buildTree();
24 }
25 
27  codes_(tree.codes_), bits_(tree.bits_) {
28  buildTree();
29 }
30 
32  folly::fbstring& literal)
33  const {
34  const SuperHuffNode* snode = &table_[0];
35  uint32_t w = 0;
36  uint32_t wbits = 0;
37  uint32_t i = 0;
38  while (i < size || wbits > 0) {
39  // decide if we need to load more bits using an 8-bit chunk
40  if (i < size && wbits < 8) {
41  w = (w << 8) | buf[i];
42  wbits += 8;
43  i++;
44  }
45  // key is used for performing the indexed lookup
46  uint32_t key;
47  if (wbits >= 8) {
48  key = w >> (wbits - 8);
49  } else {
50  // this the case we're at the end of the buffer
51  uint8_t xbits = 8 - wbits;
52  w = (w << xbits) | ((1 << xbits) - 1);
53  key = w;
54  wbits = 8;
55  }
56  // perform the indexed lookup
57  const HuffNode& node = snode->index[key];
58  if (node.isLeaf()) {
59  // final node, we can emit the character
60  literal.push_back(node.data.ch);
61  wbits -= node.metadata.bits;
62  snode = &table_[0];
63  } else {
64  // this is a branch, so we just need to move one level
65  wbits -= 8;
66  snode = &table_[node.data.superNodeIndex];
67  }
68  // remove what we've just used
69  w = w & ((1 << wbits) - 1);
70  }
71  return true;
72 }
73 
79  SuperHuffNode* snode = &table_[0];
80  while (bits > 8) {
81  uint32_t mask = 0xFF << (bits - 8);
82  uint32_t x = (code & mask) >> (bits - 8);
83  // mark this node as branch
84  if (snode->index[x].isLeaf()) {
85  nodes_++;
86  HuffNode& node = snode->index[x];
87  node.metadata.isSuperNode = true;
88  node.data.superNodeIndex = nodes_;
89  }
90  snode = &table_[snode->index[x].data.superNodeIndex];
91  bits -= 8;
92  code = code & ~mask;
93  }
94  // fill the node with all the suffixes
95  fillIndex(*snode, code, bits, ch, bits);
96 }
97 
99  return codes_;
100 }
101 
102 const uint8_t* HuffTree::bitsTable() const {
103  return bits_;
104 }
105 
110  uint8_t ch, uint8_t level) {
111  if (level == 8) {
112  snode.index[code].data.ch = ch;
113  snode.index[code].metadata.bits = bits;
114  return;
115  }
116  // generate the bit at the current level
117  code = code << 1;
118  for (uint8_t bit = 0; bit <= 1; bit++) {
119  fillIndex(snode, code | bit, bits, ch, level + 1);
120  }
121 }
122 
127  // create the indexed table
128  for (uint32_t i = 0; i < kTableSize; i++) {
129  insert(codes_[i], bits_[i], i);
130  }
131 }
132 
134  folly::io::QueueAppender& buf) const {
135  uint32_t code; // the huffman code of a given character
136  uint8_t bits; // on how many bits code is represented
137  uint32_t w = 0; // 4-byte word used for packing bits and write it to memory
138  uint8_t wbits = 0; // how many bits we have in 'w'
139  uint32_t totalBytes = 0;
140  for (size_t i = 0; i < literal.size(); i++) {
141  uint8_t ch = literal[i];
142  code = codes_[ch];
143  bits = bits_[ch];
144 
145  if (wbits + bits < 32) {
146  w = (w << bits) | code;
147  wbits += bits;
148  } else {
149  uint8_t xbits = wbits + bits - 32;
150  w = (w << (bits - xbits)) | (code >> xbits);
151  // write the word into the buffer by converting to network order, which
152  // takes care of the endianness problems
153  buf.writeBE<uint32_t>(w);
154  totalBytes += 4;
155  // carry for next batch
156  wbits = xbits;
157  w = code & ((1 << xbits) - 1);
158  }
159  }
160  // we might have some padding at the byte level
161  if (wbits & 0x7) {
162  // padding bits
163  uint8_t padbits = 8 - (wbits & 0x7);
164  w = (w << padbits) | ((1 << padbits) - 1);
165 
166  wbits += padbits;
167  }
168  // we need to write the leftover bytes, from 1 to 4 bytes
169  if (wbits > 0) {
170  uint8_t bytes = wbits >> 3;
171  // align the bits to the MSB
172  w = w << (32 - wbits);
173  // set the bytes in the network order and copy w[0], w[1]...
174  w = htonl(w);
175  // we need to use memcpy because we might write less than 4 bytes
176  buf.push((uint8_t*)&w, bytes);
177  totalBytes += bytes;
178  }
179  return totalBytes;
180 }
181 
183  uint32_t totalBits = 0;
184  for (size_t i = 0; i < literal.size(); i++) {
185  // we just need the number of bits
186  uint8_t ch = literal[i];
187  totalBits += bits_[ch];
188  }
189  uint32_t size = totalBits >> 3;
190  if (totalBits & 0x07) {
191  ++size;
192  }
193  return size;
194 }
195 
197  return std::make_pair(codes_[ch], bits_[ch]);
198 }
199 
200 //http://tools.ietf.org/html/draft-ietf-httpbis-header-compression-09#appendix-C
202  0x1ff8, 0x7fffd8, 0xfffffe2, 0xfffffe3, 0xfffffe4, 0xfffffe5, 0xfffffe6,
203  0xfffffe7, 0xfffffe8, 0xffffea, 0x3ffffffc, 0xfffffe9, 0xfffffea,
204  0x3ffffffd, 0xfffffeb, 0xfffffec, 0xfffffed, 0xfffffee, 0xfffffef,
205  0xffffff0, 0xffffff1, 0xffffff2, 0x3ffffffe, 0xffffff3, 0xffffff4,
206  0xffffff5, 0xffffff6, 0xffffff7, 0xffffff8, 0xffffff9, 0xffffffa, 0xffffffb,
207  0x14, 0x3f8, 0x3f9, 0xffa, 0x1ff9, 0x15, 0xf8, 0x7fa, 0x3fa, 0x3fb, 0xf9,
208  0x7fb, 0xfa, 0x16, 0x17, 0x18, 0x0, 0x1, 0x2, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
209  0x1e, 0x1f, 0x5c, 0xfb, 0x7ffc, 0x20, 0xffb, 0x3fc, 0x1ffa, 0x21, 0x5d,
210  0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
211  0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0xfc, 0x73, 0xfd,
212  0x1ffb, 0x7fff0, 0x1ffc, 0x3ffc, 0x22, 0x7ffd, 0x3, 0x23, 0x4, 0x24, 0x5,
213  0x25, 0x26, 0x27, 0x6, 0x74, 0x75, 0x28, 0x29, 0x2a, 0x7, 0x2b, 0x76, 0x2c,
214  0x8, 0x9, 0x2d, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7ffe, 0x7fc, 0x3ffd, 0x1ffd,
215  0xffffffc, 0xfffe6, 0x3fffd2, 0xfffe7, 0xfffe8, 0x3fffd3, 0x3fffd4,
216  0x3fffd5, 0x7fffd9, 0x3fffd6, 0x7fffda, 0x7fffdb, 0x7fffdc, 0x7fffdd,
217  0x7fffde, 0xffffeb, 0x7fffdf, 0xffffec, 0xffffed, 0x3fffd7, 0x7fffe0,
218  0xffffee, 0x7fffe1, 0x7fffe2, 0x7fffe3, 0x7fffe4, 0x1fffdc, 0x3fffd8,
219  0x7fffe5, 0x3fffd9, 0x7fffe6, 0x7fffe7, 0xffffef, 0x3fffda, 0x1fffdd,
220  0xfffe9, 0x3fffdb, 0x3fffdc, 0x7fffe8, 0x7fffe9, 0x1fffde, 0x7fffea,
221  0x3fffdd, 0x3fffde, 0xfffff0, 0x1fffdf, 0x3fffdf, 0x7fffeb, 0x7fffec,
222  0x1fffe0, 0x1fffe1, 0x3fffe0, 0x1fffe2, 0x7fffed, 0x3fffe1, 0x7fffee,
223  0x7fffef, 0xfffea, 0x3fffe2, 0x3fffe3, 0x3fffe4, 0x7ffff0, 0x3fffe5,
224  0x3fffe6, 0x7ffff1, 0x3ffffe0, 0x3ffffe1, 0xfffeb, 0x7fff1, 0x3fffe7,
225  0x7ffff2, 0x3fffe8, 0x1ffffec, 0x3ffffe2, 0x3ffffe3, 0x3ffffe4, 0x7ffffde,
226  0x7ffffdf, 0x3ffffe5, 0xfffff1, 0x1ffffed, 0x7fff2, 0x1fffe3, 0x3ffffe6,
227  0x7ffffe0, 0x7ffffe1, 0x3ffffe7, 0x7ffffe2, 0xfffff2, 0x1fffe4, 0x1fffe5,
228  0x3ffffe8, 0x3ffffe9, 0xffffffd, 0x7ffffe3, 0x7ffffe4, 0x7ffffe5, 0xfffec,
229  0xfffff3, 0xfffed, 0x1fffe6, 0x3fffe9, 0x1fffe7, 0x1fffe8, 0x7ffff3,
230  0x3fffea, 0x3fffeb, 0x1ffffee, 0x1ffffef, 0xfffff4, 0xfffff5, 0x3ffffea,
231  0x7ffff4, 0x3ffffeb, 0x7ffffe6, 0x3ffffec, 0x3ffffed, 0x7ffffe7, 0x7ffffe8,
232  0x7ffffe9, 0x7ffffea, 0x7ffffeb, 0xffffffe, 0x7ffffec, 0x7ffffed, 0x7ffffee,
233  0x7ffffef, 0x7fffff0, 0x3ffffee
234 };
235 
237  13, 23, 28, 28, 28, 28, 28, 28, 28, 24, 30, 28, 28, 30, 28, 28, 28, 28, 28,
238  28, 28, 28, 30, 28, 28, 28, 28, 28, 28, 28, 28, 28, 6, 10, 10, 12, 13, 6, 8,
239  11, 10, 10, 8, 11, 8, 6, 6, 6, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 7, 8, 15, 6,
240  12, 10, 13, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
241  7, 7, 8, 7, 8, 13, 19, 13, 14, 6, 15, 5, 6, 5, 6, 5, 6, 6, 6, 5, 7, 7, 6, 6,
242  6, 5, 6, 7, 6, 5, 5, 6, 7, 7, 7, 7, 7, 15, 11, 14, 13, 28, 20, 22, 20, 20,
243  22, 22, 22, 23, 22, 23, 23, 23, 23, 23, 24, 23, 24, 24, 22, 23, 24, 23, 23,
244  23, 23, 21, 22, 23, 22, 23, 23, 24, 22, 21, 20, 22, 22, 23, 23, 21, 23, 22,
245  22, 24, 21, 22, 23, 23, 21, 21, 22, 21, 23, 22, 23, 23, 20, 22, 22, 22, 23,
246  22, 22, 23, 26, 26, 20, 19, 22, 23, 22, 25, 26, 26, 26, 27, 27, 26, 24, 25,
247  19, 21, 26, 27, 27, 26, 27, 24, 21, 21, 26, 26, 28, 27, 27, 27, 20, 24, 20,
248  21, 22, 21, 21, 23, 22, 22, 25, 25, 24, 24, 26, 23, 26, 27, 26, 26, 27, 27,
249  27, 27, 27, 28, 27, 27, 27, 27, 27, 26
250 };
251 
252 const HuffTree& huffTree() {
254  HuffTree{s_codesTable, s_bitsTable}
255  };
256  return *huffTree;
257 }
258 
259 }}
Definition: InvokeTest.cpp:58
void insert(uint32_t code, uint8_t bits, uint8_t ch)
Definition: Huffman.cpp:78
const uint32_t s_codesTable[kTableSize]
Definition: Huffman.cpp:201
const uint8_t * bits_
Definition: Huffman.h:148
const int x
constexpr size_type size() const
Definition: Range.h:431
void push(const uint8_t *buf, size_t len)
Definition: Cursor.h:755
uint32_t encode(folly::StringPiece literal, folly::io::QueueAppender &buf) const
Definition: Huffman.cpp:133
auto ch
struct proxygen::huffman::HuffNode::@104 metadata
const uint8_t * bitsTable() const
Definition: Huffman.cpp:102
constexpr auto size(C const &c) -> decltype(c.size())
Definition: Access.h:45
union proxygen::huffman::HuffNode::@103 data
uint32_t getEncodeSize(folly::StringPiece literal) const
Definition: Huffman.cpp:182
const uint8_t s_bitsTable[kTableSize]
Definition: Huffman.cpp:236
bool isLeaf() const
Definition: Huffman.h:42
void fillIndex(SuperHuffNode &snode, uint32_t code, uint8_t bits, uint8_t ch, uint8_t level)
Definition: Huffman.cpp:109
void push_back(const value_type c)
Definition: FBString.h:1437
const uint32_t * codesTable() const
Definition: Huffman.cpp:98
const uint32_t * codes_
Definition: Huffman.h:147
const char * string
Definition: Conv.cpp:212
SuperHuffNode table_[46]
Definition: Huffman.h:152
Definition: Traits.h:577
const HuffTree & huffTree()
Definition: Huffman.cpp:252
HuffTree(const uint32_t *codes, const uint8_t *bits)
Definition: Huffman.cpp:21
bool decode(const uint8_t *buf, uint32_t size, folly::fbstring &literal) const
Definition: Huffman.cpp:31
const uint32_t kTableSize
Definition: Huffman.h:21
std::pair< uint32_t, uint8_t > getCode(uint8_t ch) const
Definition: Huffman.cpp:196
void writeBE(T value)
Definition: Cursor.h:744