proxygen
folly::json Namespace Reference

Classes

struct  serialization_opts
 

Functions

std::array< uint64_t, 2 > buildExtraAsciiToEscapeBitmap (StringPiece chars)
 
std::string serialize (dynamic const &dyn, serialization_opts const &opts)
 
template<bool EnableExtraAsciiEscapes, class T >
size_t firstEscapableInWord (T s, const serialization_opts &opts)
 
template<bool EnableExtraAsciiEscapes>
void escapeStringImpl (StringPiece input, std::string &out, const serialization_opts &opts)
 
void escapeString (StringPiece input, std::string &out, const serialization_opts &opts)
 
std::string stripComments (StringPiece jsonC)
 

Function Documentation

std::array< uint64_t, 2 > folly::json::buildExtraAsciiToEscapeBitmap ( StringPiece  chars)

Definition at line 611 of file json.cpp.

References b, and uint64_t.

Referenced by BENCHMARK(), and TEST().

611  {
612  std::array<uint64_t, 2> escapes{{0, 0}};
613  for (auto b : ByteRange(chars)) {
614  if (b >= 0x20 && b < 0x80) {
615  escapes[b / 64] |= uint64_t(1) << (b % 64);
616  }
617  }
618  return escapes;
619 }
char b
Range< const unsigned char * > ByteRange
Definition: Range.h:1163
void folly::json::escapeString ( StringPiece  input,
std::string out,
const serialization_opts opts 
)

Definition at line 824 of file json.cpp.

References folly::json::serialization_opts::extra_ascii_to_escape_bitmap, and FOLLY_UNLIKELY.

827  {
828  if (FOLLY_UNLIKELY(
829  opts.extra_ascii_to_escape_bitmap[0] ||
830  opts.extra_ascii_to_escape_bitmap[1])) {
831  escapeStringImpl<true>(input, out, opts);
832  } else {
833  escapeStringImpl<false>(input, out, opts);
834  }
835 }
#define FOLLY_UNLIKELY(x)
Definition: Likely.h:36
template<bool EnableExtraAsciiEscapes>
void folly::json::escapeStringImpl ( StringPiece  input,
std::string out,
const serialization_opts opts 
)

Definition at line 688 of file json.cpp.

References folly::Range< Iter >::begin(), c, folly::json::serialization_opts::encode_non_ascii, folly::Range< Iter >::end(), folly::json::serialization_opts::extra_ascii_to_escape_bitmap, prefix(), folly::json::serialization_opts::skip_invalid_utf8, uint64_t, uint8_t, folly::utf8ToCodePoint(), v, and folly::json::serialization_opts::validate_utf8.

691  {
692  auto hexDigit = [](uint8_t c) -> char {
693  return c < 10 ? c + '0' : c - 10 + 'a';
694  };
695 
696  out.push_back('\"');
697 
698  auto* p = reinterpret_cast<const unsigned char*>(input.begin());
699  auto* q = reinterpret_cast<const unsigned char*>(input.begin());
700  auto* e = reinterpret_cast<const unsigned char*>(input.end());
701 
702  while (p < e) {
703  // Find the longest prefix that does not need escaping, and copy
704  // it literally into the output string.
705  auto firstEsc = p;
706  while (firstEsc < e) {
707  auto avail = e - firstEsc;
708  uint64_t word = 0;
709  if (avail >= 8) {
710  word = folly::loadUnaligned<uint64_t>(firstEsc);
711  } else {
712  word = folly::partialLoadUnaligned<uint64_t>(firstEsc, avail);
713  }
714  auto prefix = firstEscapableInWord<EnableExtraAsciiEscapes>(word, opts);
715  DCHECK_LE(prefix, avail);
716  firstEsc += prefix;
717  if (prefix < 8) {
718  break;
719  }
720  }
721  if (firstEsc > p) {
722  out.append(reinterpret_cast<const char*>(p), firstEsc - p);
723  p = firstEsc;
724  // We can't be in the middle of a multibyte sequence, so we can reset q.
725  q = p;
726  if (p == e) {
727  break;
728  }
729  }
730 
731  // Handle the next byte that may need escaping.
732 
733  // Since non-ascii encoding inherently does utf8 validation
734  // we explicitly validate utf8 only if non-ascii encoding is disabled.
735  if ((opts.validate_utf8 || opts.skip_invalid_utf8) &&
736  !opts.encode_non_ascii) {
737  // To achieve better spatial and temporal coherence
738  // we do utf8 validation progressively along with the
739  // string-escaping instead of two separate passes.
740 
741  // As the encoding progresses, q will stay at or ahead of p.
742  CHECK_GE(q, p);
743 
744  // As p catches up with q, move q forward.
745  if (q == p) {
746  // calling utf8_decode has the side effect of
747  // checking that utf8 encodings are valid
748  char32_t v = utf8ToCodePoint(q, e, opts.skip_invalid_utf8);
749  if (opts.skip_invalid_utf8 && v == U'\ufffd') {
750  out.append(u8"\ufffd");
751  p = q;
752  continue;
753  }
754  }
755  }
756 
757  auto encodeUnicode = opts.encode_non_ascii && (*p & 0x80);
758  if /* constexpr */ (EnableExtraAsciiEscapes) {
759  encodeUnicode = encodeUnicode ||
760  (*p >= 0x20 && *p < 0x80 &&
761  (opts.extra_ascii_to_escape_bitmap[*p / 64] &
762  (uint64_t(1) << (*p % 64))));
763  }
764 
765  if (encodeUnicode) {
766  // note that this if condition captures utf8 chars
767  // with value > 127, so size > 1 byte (or they are whitelisted for
768  // Unicode encoding).
769  // NOTE: char32_t / char16_t are both unsigned.
770  char32_t cp = utf8ToCodePoint(p, e, opts.skip_invalid_utf8);
771  auto writeHex = [&](char16_t v) {
772  char buf[] = "\\u\0\0\0\0";
773  buf[2] = hexDigit((v >> 12) & 0x0f);
774  buf[3] = hexDigit((v >> 8) & 0x0f);
775  buf[4] = hexDigit((v >> 4) & 0x0f);
776  buf[5] = hexDigit(v & 0x0f);
777  out.append(buf, 6);
778  };
779  // From the ECMA-404 The JSON Data Interchange Syntax 2nd Edition Dec 2017
780  if (cp < 0x10000u) {
781  // If the code point is in the Basic Multilingual Plane (U+0000 through
782  // U+FFFF), then it may be represented as a six-character sequence:
783  // a reverse solidus, followed by the lowercase letter u, followed by
784  // four hexadecimal digits that encode the code point.
785  writeHex(static_cast<char16_t>(cp));
786  } else {
787  // To escape a code point that is not in the Basic Multilingual Plane,
788  // the character may be represented as a twelve-character sequence,
789  // encoding the UTF-16 surrogate pair corresponding to the code point.
790  writeHex(static_cast<char16_t>(
791  0xd800u + (((cp - 0x10000u) >> 10) & 0x3ffu)));
792  writeHex(static_cast<char16_t>(0xdc00u + ((cp - 0x10000u) & 0x3ffu)));
793  }
794  } else if (*p == '\\' || *p == '\"') {
795  char buf[] = "\\\0";
796  buf[1] = char(*p++);
797  out.append(buf, 2);
798  } else if (*p <= 0x1f) {
799  switch (*p) {
800  // clang-format off
801  case '\b': out.append("\\b"); p++; break;
802  case '\f': out.append("\\f"); p++; break;
803  case '\n': out.append("\\n"); p++; break;
804  case '\r': out.append("\\r"); p++; break;
805  case '\t': out.append("\\t"); p++; break;
806  // clang-format on
807  default:
808  // Note that this if condition captures non readable chars
809  // with value < 32, so size = 1 byte (e.g control chars).
810  char buf[] = "\\u00\0\0";
811  buf[4] = hexDigit(uint8_t((*p & 0xf0) >> 4));
812  buf[5] = hexDigit(uint8_t(*p & 0xf));
813  out.append(buf, 6);
814  p++;
815  }
816  } else {
817  out.push_back(char(*p++));
818  }
819  }
820 
821  out.push_back('\"');
822 }
auto v
char32_t utf8ToCodePoint(const unsigned char *&p, const unsigned char *const e, bool skipOnError)
Definition: Unicode.cpp:52
bool prefix(Cursor &c, uint32_t expected)
char c
template<bool EnableExtraAsciiEscapes, class T >
size_t folly::json::firstEscapableInWord ( T  s,
const serialization_opts opts 
)

Definition at line 633 of file json.cpp.

References b, c, folly::json::serialization_opts::extra_ascii_to_escape_bitmap, folly::findFirstSet(), folly::findLastSet(), i, folly::kIsLittleEndian, folly::T, uint64_t, uint8_t, and value.

633  {
634  static_assert(std::is_unsigned<T>::value, "Unsigned integer required");
635  static constexpr T kOnes = ~T() / 255; // 0x...0101
636  static constexpr T kMsbs = kOnes * 0x80; // 0x...8080
637 
638  // Sets the MSB of bytes < b. Precondition: b < 128.
639  auto isLess = [](T w, uint8_t b) {
640  // A byte is < b iff subtracting b underflows, so we check that
641  // the MSB wasn't set before and it's set after the subtraction.
642  return (w - kOnes * b) & ~w & kMsbs;
643  };
644 
645  auto isChar = [&](uint8_t c) {
646  // A byte is == c iff it is 0 if xored with c.
647  return isLess(s ^ (kOnes * c), 1);
648  };
649 
650  // The following masks have the MSB set for each byte of the word
651  // that satisfies the corresponding condition.
652  auto isHigh = s & kMsbs; // >= 128
653  auto isLow = isLess(s, 0x20); // <= 0x1f
654  auto needsEscape = isHigh | isLow | isChar('\\') | isChar('"');
655 
656  if /* constexpr */ (EnableExtraAsciiEscapes) {
657  // Deal with optional bitmap for unicode escapes. Escapes can optionally be
658  // set for ascii characters 32 - 127, so the inner loop may run up to 96
659  // times. However, for the case where 0 or a handful of bits are set,
660  // looping will be minimal through use of findFirstSet.
661  for (size_t i = 0; i < opts.extra_ascii_to_escape_bitmap.size(); ++i) {
662  const auto offset = i * 64;
663  // Clear first 32 characters if this is the first index, since those are
664  // always escaped.
665  auto bitmap = opts.extra_ascii_to_escape_bitmap[i] &
666  (i == 0 ? uint64_t(-1) << 32 : ~0UL);
667  while (bitmap) {
668  auto bit = folly::findFirstSet(bitmap);
669  needsEscape |= isChar(offset + bit - 1);
670  bitmap &= bitmap - 1;
671  }
672  }
673  }
674 
675  if (!needsEscape) {
676  return sizeof(T);
677  }
678 
680  return folly::findFirstSet(needsEscape) / 8 - 1;
681  } else {
682  return sizeof(T) - folly::findLastSet(needsEscape) / 8;
683  }
684 }
#define T(v)
Definition: http_parser.c:233
char b
BitIterator< BaseIter > findFirstSet(BitIterator< BaseIter >, BitIterator< BaseIter >)
Definition: BitIterator.h:170
constexpr auto kIsLittleEndian
Definition: Portability.h:278
constexpr unsigned int findLastSet(T const v)
Definition: Bits.h:105
static const char *const value
Definition: Conv.cpp:50
static set< string > s
char c
std::string folly::json::serialize ( dynamic const &  dyn,
serialization_opts const &  opts 
)

Definition at line 621 of file json.cpp.

References folly::json::serialization_opts::pretty_formatting, and string.

Referenced by BENCHMARK(), wangle::FilePersistenceLayer< K, V >::persist(), folly::dynamic::print_as_pseudo_json(), folly::PrintTo(), TEST(), folly::toJson(), folly::toPrettyJson(), ClientSerializeHandler::write(), and ServerSerializeHandler::write().

621  {
622  std::string ret;
623  unsigned indentLevel = 0;
624  Printer p(ret, opts.pretty_formatting ? &indentLevel : nullptr, &opts);
625  p(dyn);
626  return ret;
627 }
const char * string
Definition: Conv.cpp:212
std::string folly::json::stripComments ( StringPiece  jsonC)

Definition at line 837 of file json.cpp.

References i, s, folly::Range< Iter >::size(), string, folly::Range< Iter >::subpiece(), and UNLIKELY.

Referenced by folly::parseLogConfigJson(), and TEST().

837  {
838  std::string result;
839  enum class State {
840  None,
841  InString,
842  InlineComment,
843  LineComment
844  } state = State::None;
845 
846  for (size_t i = 0; i < jsonC.size(); ++i) {
847  auto s = jsonC.subpiece(i);
848  switch (state) {
849  case State::None:
850  if (s.startsWith("/*")) {
851  state = State::InlineComment;
852  ++i;
853  continue;
854  } else if (s.startsWith("//")) {
855  state = State::LineComment;
856  ++i;
857  continue;
858  } else if (s[0] == '\"') {
859  state = State::InString;
860  }
861  result.push_back(s[0]);
862  break;
863  case State::InString:
864  if (s[0] == '\\') {
865  if (UNLIKELY(s.size() == 1)) {
866  throw std::logic_error("Invalid JSONC: string is not terminated");
867  }
868  result.push_back(s[0]);
869  result.push_back(s[1]);
870  ++i;
871  continue;
872  } else if (s[0] == '\"') {
873  state = State::None;
874  }
875  result.push_back(s[0]);
876  break;
877  case State::InlineComment:
878  if (s.startsWith("*/")) {
879  state = State::None;
880  ++i;
881  }
882  break;
883  case State::LineComment:
884  if (s[0] == '\n') {
885  // skip the line break. It doesn't matter.
886  state = State::None;
887  }
888  break;
889  default:
890  throw std::logic_error("Unknown comment state");
891  }
892  }
893  return result;
894 }
State
See Core for details.
Definition: Core.h:43
const char * string
Definition: Conv.cpp:212
static set< string > s
#define UNLIKELY(x)
Definition: Likely.h:48
state
Definition: http_parser.c:272