proxygen
json.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2011-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include <folly/json.h>
17 
18 #include <algorithm>
19 #include <functional>
20 #include <iterator>
21 #include <type_traits>
22 
23 #include <boost/algorithm/string.hpp>
24 
25 #include <folly/Conv.h>
26 #include <folly/Portability.h>
27 #include <folly/Range.h>
28 #include <folly/String.h>
29 #include <folly/Unicode.h>
30 #include <folly/lang/Bits.h>
32 
33 namespace folly {
34 
36 
37 namespace json {
38 namespace {
39 
40 struct Printer {
41  explicit Printer(
42  std::string& out,
43  unsigned* indentLevel,
44  serialization_opts const* opts)
45  : out_(out), indentLevel_(indentLevel), opts_(*opts) {}
46 
47  void operator()(dynamic const& v) const {
48  switch (v.type()) {
49  case dynamic::DOUBLE:
50  if (!opts_.allow_nan_inf &&
51  (std::isnan(v.asDouble()) || std::isinf(v.asDouble()))) {
52  throw std::runtime_error(
53  "folly::toJson: JSON object value was a "
54  "NaN or INF");
55  }
56  toAppend(
57  v.asDouble(), &out_, opts_.double_mode, opts_.double_num_digits);
58  break;
59  case dynamic::INT64: {
60  auto intval = v.asInt();
61  if (opts_.javascript_safe) {
62  // Use folly::to to check that this integer can be represented
63  // as a double without loss of precision.
64  intval = int64_t(to<double>(intval));
65  }
66  toAppend(intval, &out_);
67  break;
68  }
69  case dynamic::BOOL:
70  out_ += v.asBool() ? "true" : "false";
71  break;
72  case dynamic::NULLT:
73  out_ += "null";
74  break;
75  case dynamic::STRING:
77  break;
78  case dynamic::OBJECT:
79  printObject(v);
80  break;
81  case dynamic::ARRAY:
82  printArray(v);
83  break;
84  default:
85  CHECK(0) << "Bad type " << v.type();
86  }
87  }
88 
89  private:
90  void printKV(const std::pair<const dynamic, dynamic>& p) const {
91  if (!opts_.allow_non_string_keys && !p.first.isString()) {
92  throw std::runtime_error(
93  "folly::toJson: JSON object key was not a "
94  "string");
95  }
96  (*this)(p.first);
97  mapColon();
98  (*this)(p.second);
99  }
100 
101  template <typename Iterator>
102  void printKVPairs(Iterator begin, Iterator end) const {
103  printKV(*begin);
104  for (++begin; begin != end; ++begin) {
105  out_ += ',';
106  newline();
107  printKV(*begin);
108  }
109  }
110 
111  void printObject(dynamic const& o) const {
112  if (o.empty()) {
113  out_ += "{}";
114  return;
115  }
116 
117  out_ += '{';
118  indent();
119  newline();
120  if (opts_.sort_keys || opts_.sort_keys_by) {
121  using ref = std::reference_wrapper<decltype(o.items())::value_type const>;
122  std::vector<ref> refs(o.items().begin(), o.items().end());
123 
125  auto const& sort_keys_by = opts_.sort_keys_by
126  ? SortByRef(opts_.sort_keys_by)
127  : SortByRef(std::less<dynamic>());
128  std::sort(refs.begin(), refs.end(), [&](ref a, ref b) {
129  // Only compare keys. No ordering among identical keys.
130  return sort_keys_by(a.get().first, b.get().first);
131  });
132  printKVPairs(refs.cbegin(), refs.cend());
133  } else {
134  printKVPairs(o.items().begin(), o.items().end());
135  }
136  outdent();
137  newline();
138  out_ += '}';
139  }
140 
141  void printArray(dynamic const& a) const {
142  if (a.empty()) {
143  out_ += "[]";
144  return;
145  }
146 
147  out_ += '[';
148  indent();
149  newline();
150  (*this)(a[0]);
151  for (auto& val : range(std::next(a.begin()), a.end())) {
152  out_ += ',';
153  newline();
154  (*this)(val);
155  }
156  outdent();
157  newline();
158  out_ += ']';
159  }
160 
161  private:
162  void outdent() const {
163  if (indentLevel_) {
164  --*indentLevel_;
165  }
166  }
167 
168  void indent() const {
169  if (indentLevel_) {
170  ++*indentLevel_;
171  }
172  }
173 
174  void newline() const {
175  if (indentLevel_) {
176  out_ += to<std::string>('\n', std::string(*indentLevel_ * 2, ' '));
177  }
178  }
179 
180  void mapColon() const {
181  out_ += indentLevel_ ? ": " : ":";
182  }
183 
184  private:
186  unsigned* const indentLevel_;
187  serialization_opts const& opts_;
188 };
189 
191 
192 struct FOLLY_EXPORT ParseError : std::runtime_error {
193  explicit ParseError(
194  unsigned int line,
195  std::string const& context,
196  std::string const& expected)
197  : std::runtime_error(to<std::string>(
198  "json parse error on line ",
199  line,
200  !context.empty() ? to<std::string>(" near `", context, '\'') : "",
201  ": ",
202  expected)) {}
203 };
204 
205 // Wraps our input buffer with some helper functions.
206 struct Input {
207  explicit Input(StringPiece range, json::serialization_opts const* opts)
208  : range_(range), opts_(*opts), lineNum_(0) {
209  storeCurrent();
210  }
211 
212  Input(Input const&) = delete;
213  Input& operator=(Input const&) = delete;
214 
215  char const* begin() const {
216  return range_.begin();
217  }
218 
219  // Parse ahead for as long as the supplied predicate is satisfied,
220  // returning a range of what was skipped.
221  template <class Predicate>
222  StringPiece skipWhile(const Predicate& p) {
223  std::size_t skipped = 0;
224  for (; skipped < range_.size(); ++skipped) {
225  if (!p(range_[skipped])) {
226  break;
227  }
228  if (range_[skipped] == '\n') {
229  ++lineNum_;
230  }
231  }
232  auto ret = range_.subpiece(0, skipped);
233  range_.advance(skipped);
234  storeCurrent();
235  return ret;
236  }
237 
238  StringPiece skipDigits() {
239  return skipWhile([](char c) { return c >= '0' && c <= '9'; });
240  }
241 
242  StringPiece skipMinusAndDigits() {
243  bool firstChar = true;
244  return skipWhile([&firstChar](char c) {
245  bool result = (c >= '0' && c <= '9') || (firstChar && c == '-');
246  firstChar = false;
247  return result;
248  });
249  }
250 
251  void skipWhitespace() {
253  storeCurrent();
254  }
255 
256  void expect(char c) {
257  if (**this != c) {
258  throw ParseError(
259  lineNum_, context(), to<std::string>("expected '", c, '\''));
260  }
261  ++*this;
262  }
263 
264  std::size_t size() const {
265  return range_.size();
266  }
267 
268  int operator*() const {
269  return current_;
270  }
271 
272  void operator++() {
273  range_.pop_front();
274  storeCurrent();
275  }
276 
277  template <class T>
278  T extract() {
279  try {
280  return to<T>(&range_);
281  } catch (std::exception const& e) {
282  error(e.what());
283  }
284  }
285 
286  bool consume(StringPiece str) {
287  if (boost::starts_with(range_, str)) {
288  range_.advance(str.size());
289  storeCurrent();
290  return true;
291  }
292  return false;
293  }
294 
295  std::string context() const {
296  return range_.subpiece(0, 16 /* arbitrary */).toString();
297  }
298 
299  dynamic error(char const* what) const {
300  throw ParseError(lineNum_, context(), what);
301  }
302 
303  json::serialization_opts const& getOpts() {
304  return opts_;
305  }
306 
307  void incrementRecursionLevel() {
308  if (currentRecursionLevel_ > opts_.recursion_limit) {
309  error("recursion limit exceeded");
310  }
312  }
313 
314  void decrementRecursionLevel() {
316  }
317 
318  private:
319  void storeCurrent() {
320  current_ = range_.empty() ? EOF : range_.front();
321  }
322 
323  private:
325  json::serialization_opts const& opts_;
326  unsigned lineNum_;
327  int current_;
328  unsigned int currentRecursionLevel_{0};
329 };
330 
331 class RecursionGuard {
332  public:
333  explicit RecursionGuard(Input& in) : in_(in) {
334  in_.incrementRecursionLevel();
335  }
336 
337  ~RecursionGuard() {
338  in_.decrementRecursionLevel();
339  }
340 
341  private:
342  Input& in_;
343 };
344 
345 dynamic parseValue(Input& in);
346 std::string parseString(Input& in);
347 dynamic parseNumber(Input& in);
348 
349 dynamic parseObject(Input& in) {
350  DCHECK_EQ(*in, '{');
351  ++in;
352 
353  dynamic ret = dynamic::object;
354 
355  in.skipWhitespace();
356  if (*in == '}') {
357  ++in;
358  return ret;
359  }
360 
361  for (;;) {
362  if (in.getOpts().allow_trailing_comma && *in == '}') {
363  break;
364  }
365  if (*in == '\"') { // string
366  auto key = parseString(in);
367  in.skipWhitespace();
368  in.expect(':');
369  in.skipWhitespace();
370  ret.insert(std::move(key), parseValue(in));
371  } else if (!in.getOpts().allow_non_string_keys) {
372  in.error("expected string for object key name");
373  } else {
374  auto key = parseValue(in);
375  in.skipWhitespace();
376  in.expect(':');
377  in.skipWhitespace();
378  ret.insert(std::move(key), parseValue(in));
379  }
380 
381  in.skipWhitespace();
382  if (*in != ',') {
383  break;
384  }
385  ++in;
386  in.skipWhitespace();
387  }
388  in.expect('}');
389 
390  return ret;
391 }
392 
393 dynamic parseArray(Input& in) {
394  DCHECK_EQ(*in, '[');
395  ++in;
396 
397  dynamic ret = dynamic::array;
398 
399  in.skipWhitespace();
400  if (*in == ']') {
401  ++in;
402  return ret;
403  }
404 
405  for (;;) {
406  if (in.getOpts().allow_trailing_comma && *in == ']') {
407  break;
408  }
409  ret.push_back(parseValue(in));
410  in.skipWhitespace();
411  if (*in != ',') {
412  break;
413  }
414  ++in;
415  in.skipWhitespace();
416  }
417  in.expect(']');
418 
419  return ret;
420 }
421 
422 dynamic parseNumber(Input& in) {
423  bool const negative = (*in == '-');
424  if (negative && in.consume("-Infinity")) {
425  if (in.getOpts().parse_numbers_as_strings) {
426  return "-Infinity";
427  } else {
428  return -std::numeric_limits<double>::infinity();
429  }
430  }
431 
432  auto integral = in.skipMinusAndDigits();
433  if (negative && integral.size() < 2) {
434  in.error("expected digits after `-'");
435  }
436 
437  auto const wasE = *in == 'e' || *in == 'E';
438 
439  constexpr const char* maxInt = "9223372036854775807";
440  constexpr const char* minInt = "-9223372036854775808";
441  constexpr auto maxIntLen = constexpr_strlen(maxInt);
442  constexpr auto minIntLen = constexpr_strlen(minInt);
443 
444  if (*in != '.' && !wasE && in.getOpts().parse_numbers_as_strings) {
445  return integral;
446  }
447 
448  if (*in != '.' && !wasE) {
449  if (LIKELY(!in.getOpts().double_fallback || integral.size() < maxIntLen) ||
450  (!negative && integral.size() == maxIntLen && integral <= maxInt) ||
451  (negative && integral.size() == minIntLen && integral <= minInt)) {
452  auto val = to<int64_t>(integral);
453  in.skipWhitespace();
454  return val;
455  } else {
456  auto val = to<double>(integral);
457  in.skipWhitespace();
458  return val;
459  }
460  }
461 
462  auto end = !wasE ? (++in, in.skipDigits().end()) : in.begin();
463  if (*in == 'e' || *in == 'E') {
464  ++in;
465  if (*in == '+' || *in == '-') {
466  ++in;
467  }
468  auto expPart = in.skipDigits();
469  end = expPart.end();
470  }
471  auto fullNum = range(integral.begin(), end);
472  if (in.getOpts().parse_numbers_as_strings) {
473  return fullNum;
474  }
475  auto val = to<double>(fullNum);
476  return val;
477 }
478 
479 std::string decodeUnicodeEscape(Input& in) {
480  auto hexVal = [&](int c) -> uint16_t {
481  // clang-format off
482  return uint16_t(
483  c >= '0' && c <= '9' ? c - '0' :
484  c >= 'a' && c <= 'f' ? c - 'a' + 10 :
485  c >= 'A' && c <= 'F' ? c - 'A' + 10 :
486  (in.error("invalid hex digit"), 0));
487  // clang-format on
488  };
489 
490  auto readHex = [&]() -> uint16_t {
491  if (in.size() < 4) {
492  in.error("expected 4 hex digits");
493  }
494 
495  uint16_t ret = uint16_t(hexVal(*in) * 4096);
496  ++in;
497  ret += hexVal(*in) * 256;
498  ++in;
499  ret += hexVal(*in) * 16;
500  ++in;
501  ret += hexVal(*in);
502  ++in;
503  return ret;
504  };
505 
506  /*
507  * If the value encoded is in the surrogate pair range, we need to
508  * make sure there is another escape that we can use also.
509  */
510  uint32_t codePoint = readHex();
511  if (codePoint >= 0xd800 && codePoint <= 0xdbff) {
512  if (!in.consume("\\u")) {
513  in.error(
514  "expected another unicode escape for second half of "
515  "surrogate pair");
516  }
517  uint16_t second = readHex();
518  if (second >= 0xdc00 && second <= 0xdfff) {
519  codePoint = 0x10000 + ((codePoint & 0x3ff) << 10) + (second & 0x3ff);
520  } else {
521  in.error("second character in surrogate pair is invalid");
522  }
523  } else if (codePoint >= 0xdc00 && codePoint <= 0xdfff) {
524  in.error("invalid unicode code point (in range [0xdc00,0xdfff])");
525  }
526 
527  return codePointToUtf8(codePoint);
528 }
529 
530 std::string parseString(Input& in) {
531  DCHECK_EQ(*in, '\"');
532  ++in;
533 
534  std::string ret;
535  for (;;) {
536  auto range = in.skipWhile([](char c) { return c != '\"' && c != '\\'; });
537  ret.append(range.begin(), range.end());
538 
539  if (*in == '\"') {
540  ++in;
541  break;
542  }
543  if (*in == '\\') {
544  ++in;
545  switch (*in) {
546  // clang-format off
547  case '\"': ret.push_back('\"'); ++in; break;
548  case '\\': ret.push_back('\\'); ++in; break;
549  case '/': ret.push_back('/'); ++in; break;
550  case 'b': ret.push_back('\b'); ++in; break;
551  case 'f': ret.push_back('\f'); ++in; break;
552  case 'n': ret.push_back('\n'); ++in; break;
553  case 'r': ret.push_back('\r'); ++in; break;
554  case 't': ret.push_back('\t'); ++in; break;
555  case 'u': ++in; ret += decodeUnicodeEscape(in); break;
556  // clang-format on
557  default:
558  in.error(
559  to<std::string>("unknown escape ", *in, " in string").c_str());
560  }
561  continue;
562  }
563  if (*in == EOF) {
564  in.error("unterminated string");
565  }
566  if (!*in) {
567  /*
568  * Apparently we're actually supposed to ban all control
569  * characters from strings. This seems unnecessarily
570  * restrictive, so we're only banning zero bytes. (Since the
571  * string is presumed to be UTF-8 encoded it's fine to just
572  * check this way.)
573  */
574  in.error("null byte in string");
575  }
576 
577  ret.push_back(char(*in));
578  ++in;
579  }
580 
581  return ret;
582 }
583 
584 dynamic parseValue(Input& in) {
585  RecursionGuard guard(in);
586 
587  in.skipWhitespace();
588  // clang-format off
589  return
590  *in == '[' ? parseArray(in) :
591  *in == '{' ? parseObject(in) :
592  *in == '\"' ? parseString(in) :
593  (*in == '-' || (*in >= '0' && *in <= '9')) ? parseNumber(in) :
594  in.consume("true") ? true :
595  in.consume("false") ? false :
596  in.consume("null") ? nullptr :
597  in.consume("Infinity") ?
598  (in.getOpts().parse_numbers_as_strings ? (dynamic)"Infinity" :
599  (dynamic)std::numeric_limits<double>::infinity()) :
600  in.consume("NaN") ?
601  (in.getOpts().parse_numbers_as_strings ? (dynamic)"NaN" :
602  (dynamic)std::numeric_limits<double>::quiet_NaN()) :
603  in.error("expected json value");
604  // clang-format on
605 }
606 
607 } // namespace
608 
610 
611 std::array<uint64_t, 2> buildExtraAsciiToEscapeBitmap(StringPiece chars) {
612  std::array<uint64_t, 2> escapes{{0, 0}};
613  for (auto b : ByteRange(chars)) {
614  if (b >= 0x20 && b < 0x80) {
615  escapes[b / 64] |= uint64_t(1) << (b % 64);
616  }
617  }
618  return escapes;
619 }
620 
622  std::string ret;
623  unsigned indentLevel = 0;
624  Printer p(ret, opts.pretty_formatting ? &indentLevel : nullptr, &opts);
625  p(dyn);
626  return ret;
627 }
628 
629 // Fast path to determine the longest prefix that can be left
630 // unescaped in a string of sizeof(T) bytes packed in an integer of
631 // type T.
632 template <bool EnableExtraAsciiEscapes, class T>
634  static_assert(std::is_unsigned<T>::value, "Unsigned integer required");
635  static constexpr T kOnes = ~T() / 255; // 0x...0101
636  static constexpr T kMsbs = kOnes * 0x80; // 0x...8080
637 
638  // Sets the MSB of bytes < b. Precondition: b < 128.
639  auto isLess = [](T w, uint8_t b) {
640  // A byte is < b iff subtracting b underflows, so we check that
641  // the MSB wasn't set before and it's set after the subtraction.
642  return (w - kOnes * b) & ~w & kMsbs;
643  };
644 
645  auto isChar = [&](uint8_t c) {
646  // A byte is == c iff it is 0 if xored with c.
647  return isLess(s ^ (kOnes * c), 1);
648  };
649 
650  // The following masks have the MSB set for each byte of the word
651  // that satisfies the corresponding condition.
652  auto isHigh = s & kMsbs; // >= 128
653  auto isLow = isLess(s, 0x20); // <= 0x1f
654  auto needsEscape = isHigh | isLow | isChar('\\') | isChar('"');
655 
656  if /* constexpr */ (EnableExtraAsciiEscapes) {
657  // Deal with optional bitmap for unicode escapes. Escapes can optionally be
658  // set for ascii characters 32 - 127, so the inner loop may run up to 96
659  // times. However, for the case where 0 or a handful of bits are set,
660  // looping will be minimal through use of findFirstSet.
661  for (size_t i = 0; i < opts.extra_ascii_to_escape_bitmap.size(); ++i) {
662  const auto offset = i * 64;
663  // Clear first 32 characters if this is the first index, since those are
664  // always escaped.
665  auto bitmap = opts.extra_ascii_to_escape_bitmap[i] &
666  (i == 0 ? uint64_t(-1) << 32 : ~0UL);
667  while (bitmap) {
668  auto bit = folly::findFirstSet(bitmap);
669  needsEscape |= isChar(offset + bit - 1);
670  bitmap &= bitmap - 1;
671  }
672  }
673  }
674 
675  if (!needsEscape) {
676  return sizeof(T);
677  }
678 
680  return folly::findFirstSet(needsEscape) / 8 - 1;
681  } else {
682  return sizeof(T) - folly::findLastSet(needsEscape) / 8;
683  }
684 }
685 
686 // Escape a string so that it is legal to print it in JSON text.
687 template <bool EnableExtraAsciiEscapes>
689  StringPiece input,
690  std::string& out,
691  const serialization_opts& opts) {
692  auto hexDigit = [](uint8_t c) -> char {
693  return c < 10 ? c + '0' : c - 10 + 'a';
694  };
695 
696  out.push_back('\"');
697 
698  auto* p = reinterpret_cast<const unsigned char*>(input.begin());
699  auto* q = reinterpret_cast<const unsigned char*>(input.begin());
700  auto* e = reinterpret_cast<const unsigned char*>(input.end());
701 
702  while (p < e) {
703  // Find the longest prefix that does not need escaping, and copy
704  // it literally into the output string.
705  auto firstEsc = p;
706  while (firstEsc < e) {
707  auto avail = e - firstEsc;
708  uint64_t word = 0;
709  if (avail >= 8) {
710  word = folly::loadUnaligned<uint64_t>(firstEsc);
711  } else {
712  word = folly::partialLoadUnaligned<uint64_t>(firstEsc, avail);
713  }
714  auto prefix = firstEscapableInWord<EnableExtraAsciiEscapes>(word, opts);
715  DCHECK_LE(prefix, avail);
716  firstEsc += prefix;
717  if (prefix < 8) {
718  break;
719  }
720  }
721  if (firstEsc > p) {
722  out.append(reinterpret_cast<const char*>(p), firstEsc - p);
723  p = firstEsc;
724  // We can't be in the middle of a multibyte sequence, so we can reset q.
725  q = p;
726  if (p == e) {
727  break;
728  }
729  }
730 
731  // Handle the next byte that may need escaping.
732 
733  // Since non-ascii encoding inherently does utf8 validation
734  // we explicitly validate utf8 only if non-ascii encoding is disabled.
735  if ((opts.validate_utf8 || opts.skip_invalid_utf8) &&
736  !opts.encode_non_ascii) {
737  // To achieve better spatial and temporal coherence
738  // we do utf8 validation progressively along with the
739  // string-escaping instead of two separate passes.
740 
741  // As the encoding progresses, q will stay at or ahead of p.
742  CHECK_GE(q, p);
743 
744  // As p catches up with q, move q forward.
745  if (q == p) {
746  // calling utf8_decode has the side effect of
747  // checking that utf8 encodings are valid
748  char32_t v = utf8ToCodePoint(q, e, opts.skip_invalid_utf8);
749  if (opts.skip_invalid_utf8 && v == U'\ufffd') {
750  out.append(u8"\ufffd");
751  p = q;
752  continue;
753  }
754  }
755  }
756 
757  auto encodeUnicode = opts.encode_non_ascii && (*p & 0x80);
758  if /* constexpr */ (EnableExtraAsciiEscapes) {
759  encodeUnicode = encodeUnicode ||
760  (*p >= 0x20 && *p < 0x80 &&
761  (opts.extra_ascii_to_escape_bitmap[*p / 64] &
762  (uint64_t(1) << (*p % 64))));
763  }
764 
765  if (encodeUnicode) {
766  // note that this if condition captures utf8 chars
767  // with value > 127, so size > 1 byte (or they are whitelisted for
768  // Unicode encoding).
769  // NOTE: char32_t / char16_t are both unsigned.
770  char32_t cp = utf8ToCodePoint(p, e, opts.skip_invalid_utf8);
771  auto writeHex = [&](char16_t v) {
772  char buf[] = "\\u\0\0\0\0";
773  buf[2] = hexDigit((v >> 12) & 0x0f);
774  buf[3] = hexDigit((v >> 8) & 0x0f);
775  buf[4] = hexDigit((v >> 4) & 0x0f);
776  buf[5] = hexDigit(v & 0x0f);
777  out.append(buf, 6);
778  };
779  // From the ECMA-404 The JSON Data Interchange Syntax 2nd Edition Dec 2017
780  if (cp < 0x10000u) {
781  // If the code point is in the Basic Multilingual Plane (U+0000 through
782  // U+FFFF), then it may be represented as a six-character sequence:
783  // a reverse solidus, followed by the lowercase letter u, followed by
784  // four hexadecimal digits that encode the code point.
785  writeHex(static_cast<char16_t>(cp));
786  } else {
787  // To escape a code point that is not in the Basic Multilingual Plane,
788  // the character may be represented as a twelve-character sequence,
789  // encoding the UTF-16 surrogate pair corresponding to the code point.
790  writeHex(static_cast<char16_t>(
791  0xd800u + (((cp - 0x10000u) >> 10) & 0x3ffu)));
792  writeHex(static_cast<char16_t>(0xdc00u + ((cp - 0x10000u) & 0x3ffu)));
793  }
794  } else if (*p == '\\' || *p == '\"') {
795  char buf[] = "\\\0";
796  buf[1] = char(*p++);
797  out.append(buf, 2);
798  } else if (*p <= 0x1f) {
799  switch (*p) {
800  // clang-format off
801  case '\b': out.append("\\b"); p++; break;
802  case '\f': out.append("\\f"); p++; break;
803  case '\n': out.append("\\n"); p++; break;
804  case '\r': out.append("\\r"); p++; break;
805  case '\t': out.append("\\t"); p++; break;
806  // clang-format on
807  default:
808  // Note that this if condition captures non readable chars
809  // with value < 32, so size = 1 byte (e.g control chars).
810  char buf[] = "\\u00\0\0";
811  buf[4] = hexDigit(uint8_t((*p & 0xf0) >> 4));
812  buf[5] = hexDigit(uint8_t(*p & 0xf));
813  out.append(buf, 6);
814  p++;
815  }
816  } else {
817  out.push_back(char(*p++));
818  }
819  }
820 
821  out.push_back('\"');
822 }
823 
825  StringPiece input,
826  std::string& out,
827  const serialization_opts& opts) {
828  if (FOLLY_UNLIKELY(
830  opts.extra_ascii_to_escape_bitmap[1])) {
831  escapeStringImpl<true>(input, out, opts);
832  } else {
833  escapeStringImpl<false>(input, out, opts);
834  }
835 }
836 
838  std::string result;
839  enum class State {
840  None,
841  InString,
842  InlineComment,
843  LineComment
844  } state = State::None;
845 
846  for (size_t i = 0; i < jsonC.size(); ++i) {
847  auto s = jsonC.subpiece(i);
848  switch (state) {
849  case State::None:
850  if (s.startsWith("/*")) {
851  state = State::InlineComment;
852  ++i;
853  continue;
854  } else if (s.startsWith("//")) {
855  state = State::LineComment;
856  ++i;
857  continue;
858  } else if (s[0] == '\"') {
859  state = State::InString;
860  }
861  result.push_back(s[0]);
862  break;
863  case State::InString:
864  if (s[0] == '\\') {
865  if (UNLIKELY(s.size() == 1)) {
866  throw std::logic_error("Invalid JSONC: string is not terminated");
867  }
868  result.push_back(s[0]);
869  result.push_back(s[1]);
870  ++i;
871  continue;
872  } else if (s[0] == '\"') {
873  state = State::None;
874  }
875  result.push_back(s[0]);
876  break;
877  case State::InlineComment:
878  if (s.startsWith("*/")) {
879  state = State::None;
880  ++i;
881  }
882  break;
883  case State::LineComment:
884  if (s[0] == '\n') {
885  // skip the line break. It doesn't matter.
886  state = State::None;
887  }
888  break;
889  default:
890  throw std::logic_error("Unknown comment state");
891  }
892  }
893  return result;
894 }
895 
896 } // namespace json
897 
899 
901  return parseJson(range, json::serialization_opts());
902 }
903 
905  json::Input in(range, &opts);
906 
907  auto ret = parseValue(in);
908  in.skipWhitespace();
909  if (in.size() && *in != '\0') {
910  in.error("parsing didn't consume all input");
911  }
912  return ret;
913 }
914 
917 }
918 
921  opts.pretty_formatting = true;
922  return json::serialize(dyn, opts);
923 }
924 
926 // dynamic::print_as_pseudo_json() is implemented here for header
927 // ordering reasons (most of the dynamic implementation is in
928 // dynamic-inl.h, which we don't want to include json.h).
929 
930 void dynamic::print_as_pseudo_json(std::ostream& out) const {
932  opts.allow_non_string_keys = true;
933  opts.allow_nan_inf = true;
934  out << json::serialize(*this, opts);
935 }
936 
937 void PrintTo(const dynamic& dyn, std::ostream* os) {
939  opts.allow_nan_inf = true;
940  opts.allow_non_string_keys = true;
941  opts.pretty_formatting = true;
942  opts.sort_keys = true;
943  *os << json::serialize(dyn, opts);
944 }
945 
947 
948 } // namespace folly
static ObjectMaker object()
Definition: dynamic-inl.h:240
auto v
int current_
Definition: json.cpp:327
dynamic parseJson(StringPiece range)
Definition: json.cpp:900
char b
BitIterator< BaseIter > findFirstSet(BitIterator< BaseIter >, BitIterator< BaseIter >)
Definition: BitIterator.h:170
context
Definition: CMakeCache.txt:563
unsigned lineNum_
Definition: json.cpp:326
StringPiece skipWhitespace(StringPiece sp)
Definition: String.h:577
constexpr detail::Map< Move > move
Definition: Base-inl.h:2567
double asDouble() const
Definition: dynamic-inl.h:521
#define LIKELY(x)
Definition: Likely.h:47
STL namespace.
A reference wrapper for callable objects.
Definition: Function.h:893
constexpr size_type size() const
Definition: Range.h:431
auto begin(TestAdlIterable &instance)
Definition: ForeachTest.cpp:56
double val
Definition: String.cpp:273
char32_t utf8ToCodePoint(const unsigned char *&p, const unsigned char *const e, bool skipOnError)
Definition: Unicode.cpp:52
#define FOLLY_UNLIKELY(x)
Definition: Likely.h:36
std::unordered_map< std::string, IValidator * > refs
Definition: JSONSchema.cpp:104
std::string stripComments(StringPiece jsonC)
Definition: json.cpp:837
void PrintTo(const dynamic &dyn, std::ostream *os)
Definition: json.cpp:937
folly::std T
constexpr auto kIsLittleEndian
Definition: Portability.h:278
bool asBool() const
Definition: dynamic-inl.h:527
—— Concurrent Priority Queue Implementation ——
Definition: AtomicBitSet.h:29
#define FOLLY_EXPORT
Definition: CPortability.h:133
serialization_opts const & opts_
Definition: json.cpp:187
bool prefix(Cursor &c, uint32_t expected)
requires And< SemiMovable< VN >... > &&SemiMovable< E > auto error(E e)
Definition: error.h:48
#define nullptr
Definition: http_parser.c:41
StringPiece range_
Definition: json.cpp:324
std::string codePointToUtf8(char32_t cp)
Definition: Unicode.cpp:24
constexpr auto size(C const &c) -> decltype(c.size())
Definition: Access.h:45
State
See Core for details.
Definition: Core.h:43
bool empty() const
Definition: dynamic-inl.h:815
std::string asString() const
Definition: dynamic-inl.h:518
auto end(TestAdlIterable &instance)
Definition: ForeachTest.cpp:62
Input & in_
Definition: json.cpp:342
Range subpiece(size_type first, size_type length=npos) const
Definition: Range.h:686
GuardImpl guard(ErrorHandler &&handler)
Definition: Base.h:840
std::string toPrettyJson(dynamic const &dyn)
Definition: json.cpp:919
constexpr Range< Iter > range(Iter first, Iter last)
Definition: Range.h:1114
int64_t asInt() const
Definition: dynamic-inl.h:524
constexpr unsigned int findLastSet(T const v)
Definition: Bits.h:105
char a
void print_as_pseudo_json(std::ostream &) const
Definition: json.cpp:930
void expect(LineReader &lr, const char *expected)
static const char *const value
Definition: Conv.cpp:50
std::array< uint64_t, 2 > buildExtraAsciiToEscapeBitmap(StringPiece chars)
Definition: json.cpp:611
void toAppend(char value, Tgt *result)
Definition: Conv.h:406
constexpr Iter end() const
Definition: Range.h:455
std::string & out_
Definition: json.cpp:185
constexpr Iter begin() const
Definition: Range.h:452
unsigned *const indentLevel_
Definition: json.cpp:186
void escapeString(StringPiece input, std::string &out, const serialization_opts &opts)
Definition: json.cpp:824
constexpr size_t constexpr_strlen(const Char *s)
Definition: Constexpr.h:57
IterableProxy< const_item_iterator > items() const
Definition: dynamic-inl.h:476
const char * string
Definition: Conv.cpp:212
Range< const unsigned char * > ByteRange
Definition: Range.h:1163
static void array(EmptyArrayTag)
Definition: dynamic-inl.h:233
static set< string > s
unsigned int currentRecursionLevel_
Definition: json.cpp:328
std::array< uint64_t, 2 > extra_ascii_to_escape_bitmap
Definition: json.h:137
Type type() const
Definition: dynamic-inl.h:514
Range< const char * > StringPiece
#define UNLIKELY(x)
Definition: Likely.h:48
std::string serialize(dynamic const &dyn, serialization_opts const &opts)
Definition: json.cpp:621
const_iterator begin() const
Definition: dynamic-inl.h:432
std::string toJson(dynamic const &dyn)
Definition: json.cpp:915
char c
void escapeStringImpl(StringPiece input, std::string &out, const serialization_opts &opts)
Definition: json.cpp:688
state
Definition: http_parser.c:272
const_iterator end() const
Definition: dynamic-inl.h:435
bool starts_with(const path &pth, const path &prefix)
Definition: FsUtil.cpp:45
def next(obj)
Definition: ast.py:58
size_t firstEscapableInWord(T s, const serialization_opts &opts)
Definition: json.cpp:633