proxygen
Zlib.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <folly/compression/Zlib.h>
18 
19 #if FOLLY_HAVE_LIBZ
20 
21 #include <folly/Conv.h>
22 #include <folly/Optional.h>
23 #include <folly/Range.h>
24 #include <folly/ScopeGuard.h>
27 #include <folly/io/Cursor.h>
28 
31 
32 namespace folly {
33 namespace io {
34 namespace zlib {
35 
36 namespace {
37 
38 bool isValidStrategy(int strategy) {
39  std::array<int, 5> strategies{{
40  Z_DEFAULT_STRATEGY,
41  Z_FILTERED,
42  Z_HUFFMAN_ONLY,
43  Z_RLE,
44  Z_FIXED,
45  }};
46  return std::any_of(strategies.begin(), strategies.end(), [&](int i) {
47  return i == strategy;
48  });
49 }
50 
51 int getWindowBits(Options::Format format, int windowSize) {
52  switch (format) {
53  case Options::Format::ZLIB:
54  return windowSize;
55  case Options::Format::GZIP:
56  return windowSize + 16;
57  case Options::Format::RAW:
58  return -windowSize;
59  case Options::Format::AUTO:
60  return windowSize + 32;
61  default:
62  return windowSize;
63  }
64 }
65 
66 CodecType getCodecType(Options options) {
67  if (options.windowSize == 15 && options.format == Options::Format::ZLIB) {
68  return CodecType::ZLIB;
69  } else if (
70  options.windowSize == 15 && options.format == Options::Format::GZIP) {
71  return CodecType::GZIP;
72  } else {
74  }
75 }
76 
77 class ZlibStreamCodec final : public StreamCodec {
78  public:
79  static std::unique_ptr<Codec> createCodec(Options options, int level);
80  static std::unique_ptr<StreamCodec> createStream(Options options, int level);
81 
82  explicit ZlibStreamCodec(Options options, int level);
83  ~ZlibStreamCodec() override;
84 
85  std::vector<std::string> validPrefixes() const override;
86  bool canUncompress(const IOBuf* data, Optional<uint64_t> uncompressedLength)
87  const override;
88 
89  private:
90  uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
91 
92  void doResetStream() override;
93  bool doCompressStream(
94  ByteRange& input,
96  StreamCodec::FlushOp flush) override;
97  bool doUncompressStream(
98  ByteRange& input,
100  StreamCodec::FlushOp flush) override;
101 
102  void resetDeflateStream();
103  void resetInflateStream();
104 
105  Options options_;
106 
107  Optional<z_stream> deflateStream_{};
108  Optional<z_stream> inflateStream_{};
109  int level_;
110  bool needReset_{true};
111 };
112 static constexpr uint16_t kGZIPMagicLE = 0x8B1F;
113 
114 std::vector<std::string> ZlibStreamCodec::validPrefixes() const {
115  if (type() == CodecType::ZLIB) {
116  // Zlib streams start with a 2 byte header.
117  //
118  // 0 1
119  // +---+---+
120  // |CMF|FLG|
121  // +---+---+
122  //
123  // We won't restrict the values of any sub-fields except as described below.
124  //
125  // The lowest 4 bits of CMF is the compression method (CM).
126  // CM == 0x8 is the deflate compression method, which is currently the only
127  // supported compression method, so any valid prefix must have CM == 0x8.
128  //
129  // The lowest 5 bits of FLG is FCHECK.
130  // FCHECK must be such that the two header bytes are a multiple of 31 when
131  // interpreted as a big endian 16-bit number.
132  std::vector<std::string> result;
133  // 16 values for the first byte, 8 values for the second byte.
134  // There are also 4 combinations where both 0x00 and 0x1F work as FCHECK.
135  result.reserve(132);
136  // Select all values for the CMF byte that use the deflate algorithm 0x8.
137  for (uint32_t first = 0x0800; first <= 0xF800; first += 0x1000) {
138  // Select all values for the FLG, but leave FCHECK as 0 since it's fixed.
139  for (uint32_t second = 0x00; second <= 0xE0; second += 0x20) {
140  uint16_t prefix = first | second;
141  // Compute FCHECK.
142  prefix += 31 - (prefix % 31);
143  result.push_back(prefixToStringLE(Endian::big(prefix)));
144  // zlib won't produce this, but it is a valid prefix.
145  if ((prefix & 0x1F) == 31) {
146  prefix -= 31;
147  result.push_back(prefixToStringLE(Endian::big(prefix)));
148  }
149  }
150  }
151  return result;
152  } else if (type() == CodecType::GZIP) {
153  // The gzip frame starts with 2 magic bytes.
154  return {prefixToStringLE(kGZIPMagicLE)};
155  } else {
156  return {};
157  }
158 }
159 
160 bool ZlibStreamCodec::canUncompress(const IOBuf* data, Optional<uint64_t>)
161  const {
162  if (type() == CodecType::ZLIB) {
163  uint16_t value;
164  Cursor cursor{data};
165  if (!cursor.tryReadBE(value)) {
166  return false;
167  }
168  // zlib compressed if using deflate and is a multiple of 31.
169  return (value & 0x0F00) == 0x0800 && value % 31 == 0;
170  } else if (type() == CodecType::GZIP) {
171  return dataStartsWithLE(data, kGZIPMagicLE);
172  } else {
173  return false;
174  }
175 }
176 
177 uint64_t ZlibStreamCodec::doMaxCompressedLength(
178  uint64_t uncompressedLength) const {
179  // When passed a nullptr, deflateBound() adds 6 bytes for a zlib wrapper. A
180  // gzip wrapper is 18 bytes, so we add the 12 byte difference.
181  return deflateBound(nullptr, uncompressedLength) +
182  (options_.format == Options::Format::GZIP ? 12 : 0);
183 }
184 
185 std::unique_ptr<Codec> ZlibStreamCodec::createCodec(
186  Options options,
187  int level) {
188  return std::make_unique<ZlibStreamCodec>(options, level);
189 }
190 
191 std::unique_ptr<StreamCodec> ZlibStreamCodec::createStream(
192  Options options,
193  int level) {
194  return std::make_unique<ZlibStreamCodec>(options, level);
195 }
196 
197 static bool inBounds(int value, int low, int high) {
198  return (value >= low) && (value <= high);
199 }
200 
201 static int zlibConvertLevel(int level) {
202  switch (level) {
204  return 1;
206  return 6;
208  return 9;
209  }
210  if (!inBounds(level, 0, 9)) {
211  throw std::invalid_argument(
212  to<std::string>("ZlibStreamCodec: invalid level: ", level));
213  }
214  return level;
215 }
216 
217 ZlibStreamCodec::ZlibStreamCodec(Options options, int level)
218  : StreamCodec(
219  getCodecType(options),
220  zlibConvertLevel(level),
221  getCodecType(options) == CodecType::GZIP ? "gzip" : "zlib"),
222  level_(zlibConvertLevel(level)) {
223  options_ = options;
224 
225  // Although zlib allows a windowSize of 8..15, a value of 8 is not
226  // properly supported and is treated as a value of 9. This means data deflated
227  // with windowSize==8 can not be re-inflated with windowSize==8. windowSize==8
228  // is also not supported for gzip and raw deflation.
229  // Hence, the codec supports only 9..15.
230  if (!inBounds(options_.windowSize, 9, 15)) {
231  throw std::invalid_argument(to<std::string>(
232  "ZlibStreamCodec: invalid windowSize option: ", options.windowSize));
233  }
234  if (!inBounds(options_.memLevel, 1, 9)) {
235  throw std::invalid_argument(to<std::string>(
236  "ZlibStreamCodec: invalid memLevel option: ", options.memLevel));
237  }
238  if (!isValidStrategy(options_.strategy)) {
239  throw std::invalid_argument(to<std::string>(
240  "ZlibStreamCodec: invalid strategy: ", options.strategy));
241  }
242 }
243 
244 ZlibStreamCodec::~ZlibStreamCodec() {
245  if (deflateStream_) {
246  deflateEnd(deflateStream_.get_pointer());
247  deflateStream_.clear();
248  }
249  if (inflateStream_) {
250  inflateEnd(inflateStream_.get_pointer());
251  inflateStream_.clear();
252  }
253 }
254 
255 void ZlibStreamCodec::doResetStream() {
256  needReset_ = true;
257 }
258 
259 void ZlibStreamCodec::resetDeflateStream() {
260  if (deflateStream_) {
261  int const rc = deflateReset(deflateStream_.get_pointer());
262  if (rc != Z_OK) {
263  deflateStream_.clear();
264  throw std::runtime_error(
265  to<std::string>("ZlibStreamCodec: deflateReset error: ", rc));
266  }
267  return;
268  }
269  deflateStream_ = z_stream{};
270 
271  // The automatic header detection format is only for inflation.
272  // Use zlib for deflation if the format is auto.
273  int const windowBits = getWindowBits(
274  options_.format == Options::Format::AUTO ? Options::Format::ZLIB
275  : options_.format,
276  options_.windowSize);
277 
278  int const rc = deflateInit2(
279  deflateStream_.get_pointer(),
280  level_,
281  Z_DEFLATED,
282  windowBits,
283  options_.memLevel,
284  options_.strategy);
285  if (rc != Z_OK) {
286  deflateStream_.clear();
287  throw std::runtime_error(
288  to<std::string>("ZlibStreamCodec: deflateInit error: ", rc));
289  }
290 }
291 
292 void ZlibStreamCodec::resetInflateStream() {
293  if (inflateStream_) {
294  int const rc = inflateReset(inflateStream_.get_pointer());
295  if (rc != Z_OK) {
296  inflateStream_.clear();
297  throw std::runtime_error(
298  to<std::string>("ZlibStreamCodec: inflateReset error: ", rc));
299  }
300  return;
301  }
302  inflateStream_ = z_stream{};
303  int const rc = inflateInit2(
304  inflateStream_.get_pointer(),
305  getWindowBits(options_.format, options_.windowSize));
306  if (rc != Z_OK) {
307  inflateStream_.clear();
308  throw std::runtime_error(
309  to<std::string>("ZlibStreamCodec: inflateInit error: ", rc));
310  }
311 }
312 
313 static int zlibTranslateFlush(StreamCodec::FlushOp flush) {
314  switch (flush) {
315  case StreamCodec::FlushOp::NONE:
316  return Z_NO_FLUSH;
317  case StreamCodec::FlushOp::FLUSH:
318  return Z_SYNC_FLUSH;
319  case StreamCodec::FlushOp::END:
320  return Z_FINISH;
321  default:
322  throw std::invalid_argument("ZlibStreamCodec: Invalid flush");
323  }
324 }
325 
326 static int zlibThrowOnError(int rc) {
327  switch (rc) {
328  case Z_OK:
329  case Z_BUF_ERROR:
330  case Z_STREAM_END:
331  return rc;
332  default:
333  throw std::runtime_error(to<std::string>("ZlibStreamCodec: error: ", rc));
334  }
335 }
336 
337 bool ZlibStreamCodec::doCompressStream(
338  ByteRange& input,
340  StreamCodec::FlushOp flush) {
341  if (needReset_) {
342  resetDeflateStream();
343  needReset_ = false;
344  }
345  DCHECK(deflateStream_.hasValue());
346  // zlib will return Z_STREAM_ERROR if output.data() is null.
347  if (output.data() == nullptr) {
348  return false;
349  }
350  deflateStream_->next_in = const_cast<uint8_t*>(input.data());
351  deflateStream_->avail_in = input.size();
352  deflateStream_->next_out = output.data();
353  deflateStream_->avail_out = output.size();
354  SCOPE_EXIT {
355  input.uncheckedAdvance(input.size() - deflateStream_->avail_in);
356  output.uncheckedAdvance(output.size() - deflateStream_->avail_out);
357  };
358  int const rc = zlibThrowOnError(
359  deflate(deflateStream_.get_pointer(), zlibTranslateFlush(flush)));
360  switch (flush) {
361  case StreamCodec::FlushOp::NONE:
362  return false;
363  case StreamCodec::FlushOp::FLUSH:
364  return deflateStream_->avail_in == 0 && deflateStream_->avail_out != 0;
365  case StreamCodec::FlushOp::END:
366  return rc == Z_STREAM_END;
367  default:
368  throw std::invalid_argument("ZlibStreamCodec: Invalid flush");
369  }
370 }
371 
372 bool ZlibStreamCodec::doUncompressStream(
373  ByteRange& input,
374  MutableByteRange& output,
375  StreamCodec::FlushOp flush) {
376  if (needReset_) {
377  resetInflateStream();
378  needReset_ = false;
379  }
380  DCHECK(inflateStream_.hasValue());
381  // zlib will return Z_STREAM_ERROR if output.data() is null.
382  if (output.data() == nullptr) {
383  return false;
384  }
385  inflateStream_->next_in = const_cast<uint8_t*>(input.data());
386  inflateStream_->avail_in = input.size();
387  inflateStream_->next_out = output.data();
388  inflateStream_->avail_out = output.size();
389  SCOPE_EXIT {
390  input.advance(input.size() - inflateStream_->avail_in);
391  output.advance(output.size() - inflateStream_->avail_out);
392  };
393  int const rc = zlibThrowOnError(
394  inflate(inflateStream_.get_pointer(), zlibTranslateFlush(flush)));
395  return rc == Z_STREAM_END;
396 }
397 
398 } // namespace
399 
400 Options defaultGzipOptions() {
401  return Options(Options::Format::GZIP);
402 }
403 
404 Options defaultZlibOptions() {
405  return Options(Options::Format::ZLIB);
406 }
407 
408 std::unique_ptr<Codec> getCodec(Options options, int level) {
409  return ZlibStreamCodec::createCodec(options, level);
410 }
411 
412 std::unique_ptr<StreamCodec> getStreamCodec(Options options, int level) {
413  return ZlibStreamCodec::createStream(options, level);
414 }
415 
416 } // namespace zlib
417 } // namespace io
418 } // namespace folly
419 
420 #endif // FOLLY_HAVE_LIBZ
constexpr int COMPRESSION_LEVEL_DEFAULT
Definition: Compression.h:441
PskType type
#define SCOPE_EXIT
Definition: ScopeGuard.h:274
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
bool prefix(Cursor &c, uint32_t expected)
Range< unsigned char * > MutableByteRange
Definition: Range.h:1164
std::enable_if< std::is_arithmetic< T >::value, std::string >::type prefixToStringLE(T prefix, uint64_t n=sizeof(T))
Definition: Utils.h:54
static T big(T x)
Definition: Bits.h:259
std::unique_ptr< StreamCodec > getStreamCodec(CodecType type, int level)
constexpr auto data(C &c) -> decltype(c.data())
Definition: Access.h:71
constexpr int COMPRESSION_LEVEL_BEST
Definition: Compression.h:442
Range< const unsigned char * > ByteRange
Definition: Range.h:1163
uint64_t value(const typename LockFreeRingBuffer< T, Atom >::Cursor &rbcursor)
Formatter< false, Args... > format(StringPiece fmt, Args &&...args)
Definition: Format.h:271
std::enable_if< std::is_unsigned< T >::value, bool >::type dataStartsWithLE(const IOBuf *data, T prefix, uint64_t n=sizeof(T))
Definition: Utils.h:40
constexpr int COMPRESSION_LEVEL_FASTEST
Definition: Compression.h:440
std::unique_ptr< Codec > getCodec(CodecType type, int level)
constexpr detail::First first
Definition: Base-inl.h:2553