/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- * vim: set ts=8 sts=2 et sw=2 tw=80: * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "builtin/intl/LocaleNegotiation.h" #include "mozilla/Assertions.h" #include "mozilla/EnumeratedRange.h" #include "mozilla/intl/Calendar.h" #include "mozilla/intl/Collator.h" #include "mozilla/intl/Locale.h" #include "mozilla/intl/NumberingSystem.h" #include "mozilla/Maybe.h" #include "mozilla/Range.h" #include #include #include #include #include #include "builtin/Array.h" #include "builtin/intl/CommonFunctions.h" #include "builtin/intl/FormatBuffer.h" #include "builtin/intl/NumberingSystemsGenerated.h" #include "builtin/intl/ParameterNegotiation.h" #include "builtin/intl/SharedIntlData.h" #include "builtin/intl/StringAsciiChars.h" #include "js/Conversions.h" #include "js/Result.h" #include "util/StringBuilder.h" #include "vm/ArrayObject.h" #include "vm/GlobalObject.h" #include "vm/JSContext.h" #include "vm/PlainObject.h" #include "vm/Realm.h" #include "vm/StringType.h" #include "vm/NativeObject-inl.h" #include "vm/ObjectOperations-inl.h" using namespace js; using namespace js::intl; static constexpr auto UnicodeExtensionKeyNames() { mozilla::EnumeratedArray names; names[UnicodeExtensionKey::Calendar] = "ca"; names[UnicodeExtensionKey::Collation] = "co"; names[UnicodeExtensionKey::CollationCaseFirst] = "kf"; names[UnicodeExtensionKey::CollationNumeric] = "kn"; names[UnicodeExtensionKey::HourCycle] = "hc"; names[UnicodeExtensionKey::NumberingSystem] = "nu"; return names; } template static mozilla::Maybe ToUnicodeExtensionKey( std::basic_string_view subtag) { MOZ_ASSERT(subtag.length() == 2); static constexpr auto names = UnicodeExtensionKeyNames(); for (auto key : mozilla::MakeInclusiveEnumeratedRange( mozilla::MaxEnumValue::value)) { auto name = names[key]; if (name[0] == subtag[0] && name[1] == subtag[1]) { return mozilla::Some(key); } } return mozilla::Nothing(); } static bool AssertCanonicalLocaleWithoutUnicodeExtension( JSContext* cx, Handle locale) { #ifdef DEBUG MOZ_ASSERT(StringIsAscii(locale), "language tags are ASCII-only"); // |locale| is a structurally valid language tag. mozilla::intl::Locale tag; using ParserError = mozilla::intl::LocaleParser::ParserError; mozilla::Result parse_result = Ok(); { StringAsciiChars chars(locale); if (!chars.init(cx)) { return false; } parse_result = mozilla::intl::LocaleParser::TryParse(chars, tag); } if (parse_result.isErr()) { MOZ_ASSERT(parse_result.unwrapErr() == ParserError::OutOfMemory, "locale is a structurally valid language tag"); ReportInternalError(cx); return false; } MOZ_ASSERT(!tag.GetUnicodeExtension(), "locale must contain no Unicode extensions"); if (auto result = tag.Canonicalize(); result.isErr()) { MOZ_ASSERT(result.unwrapErr() != mozilla::intl::Locale::CanonicalizationError::DuplicateVariant); ReportInternalError(cx); return false; } FormatBuffer buffer(cx); if (auto result = tag.ToString(buffer); result.isErr()) { ReportInternalError(cx, result.unwrapErr()); return false; } MOZ_ASSERT(StringEqualsAscii(locale, buffer.data(), buffer.length()), "locale is a canonicalized language tag"); #endif return true; } static bool SameOrParentLocale(const JSLinearString* locale, const JSLinearString* otherLocale) { // Return true if |locale| is the same locale as |otherLocale|. if (locale->length() == otherLocale->length()) { return EqualStrings(locale, otherLocale); } // Also return true if |locale| is the parent locale of |otherLocale|. if (locale->length() < otherLocale->length()) { return HasSubstringAt(otherLocale, locale, 0) && otherLocale->latin1OrTwoByteChar(locale->length()) == '-'; } return false; } /** * 9.2.2 BestAvailableLocale ( availableLocales, locale ) * * Compares a BCP 47 language tag against the locales in availableLocales and * returns the best available match. Uses the fallback mechanism of RFC 4647, * section 3.4. * * Spec: ECMAScript Internationalization API Specification, 9.2.2. * Spec: RFC 4647, section 3.4. */ static JS::Result BestAvailableLocale( JSContext* cx, AvailableLocaleKind availableLocales, Handle locale, Handle defaultLocale) { // In the spec, [[availableLocales]] is formally a list of all available // locales. But in our implementation, it's an *incomplete* list, not // necessarily including the default locale (and all locales implied by it, // e.g. "de" implied by "de-CH"), if that locale isn't in every // [[availableLocales]] list (because that locale is supported through // fallback, e.g. "de-CH" supported through "de"). // // If we're considering the default locale, augment the spec loop with // additional checks to also test whether the current prefix is a prefix of // the default locale. auto& sharedIntlData = cx->runtime()->sharedIntlData.ref(); auto findLast = [](const auto* chars, size_t length) { auto rbegin = std::make_reverse_iterator(chars + length); auto rend = std::make_reverse_iterator(chars); auto p = std::find(rbegin, rend, '-'); // |dist(chars, p.base())| is equal to |dist(p, rend)|, pick whichever you // find easier to reason about when using reserve iterators. ptrdiff_t r = std::distance(chars, p.base()); MOZ_ASSERT(r == std::distance(p, rend)); // But always subtract one to convert from the reverse iterator result to // the correspoding forward iterator value, because reserve iterators point // to one element past the forward iterator value. return r - 1; }; if (!AssertCanonicalLocaleWithoutUnicodeExtension(cx, locale)) { return cx->alreadyReportedError(); } // Step 1. Rooted candidate(cx, locale); // Step 2. while (true) { // Step 2.a. bool supported = false; if (!sharedIntlData.isAvailableLocale(cx, availableLocales, candidate, &supported)) { return cx->alreadyReportedError(); } if (supported) { return candidate.get(); } if (defaultLocale && SameOrParentLocale(candidate, defaultLocale)) { return candidate.get(); } // Step 2.b. ptrdiff_t pos; if (candidate->hasLatin1Chars()) { JS::AutoCheckCannotGC nogc; pos = findLast(candidate->latin1Chars(nogc), candidate->length()); } else { JS::AutoCheckCannotGC nogc; pos = findLast(candidate->twoByteChars(nogc), candidate->length()); } if (pos < 0) { return nullptr; } // Step 2.c. size_t length = size_t(pos); if (length >= 2 && candidate->latin1OrTwoByteChar(length - 2) == '-') { length -= 2; } // Step 2.d. candidate = NewDependentString(cx, candidate, 0, length); if (!candidate) { return cx->alreadyReportedError(); } } } // 9.2.2 BestAvailableLocale ( availableLocales, locale ) // // Carries an additional third argument in our implementation to provide the // default locale. See the doc-comment in the header file. bool js::intl::BestAvailableLocale(JSContext* cx, AvailableLocaleKind availableLocales, Handle locale, Handle defaultLocale, MutableHandle result) { JSLinearString* res; JS_TRY_VAR_OR_RETURN_FALSE( cx, res, BestAvailableLocale(cx, availableLocales, locale, defaultLocale)); if (res) { result.set(res); } else { result.set(nullptr); } return true; } template static size_t BaseNameLength(mozilla::Range locale) { // Search for the start of the first singleton subtag. for (size_t i = 0; i < locale.length(); i++) { if (locale[i] == '-') { MOZ_RELEASE_ASSERT(i + 2 < locale.length(), "invalid locale"); if (locale[i + 2] == '-') { return i; } } } return locale.length(); } static size_t BaseNameLength(JSLinearString* locale) { JS::AutoCheckCannotGC nogc; if (locale->hasLatin1Chars()) { return BaseNameLength(locale->latin1Range(nogc)); } return BaseNameLength(locale->twoByteRange(nogc)); } /** * Returns the subset of requestedLocales for which availableLocales has a * matching (possibly fallback) locale. Locales appear in the same order in the * returned list as in the input list. * * Spec: ECMAScript Internationalization API Specification, 9.2.7. * Spec: ECMAScript Internationalization API Specification, 9.2.8. */ static bool LookupSupportedLocales( JSContext* cx, AvailableLocaleKind availableLocales, Handle requestedLocales, MutableHandle supportedLocales) { // Step 1. MOZ_ASSERT(supportedLocales.empty()); Rooted defaultLocale( cx, cx->global()->globalIntlData().defaultLocale(cx)); if (!defaultLocale) { return false; } // Step 2. Rooted noExtensionsLocale(cx); Rooted availableLocale(cx); for (size_t i = 0; i < requestedLocales.length(); i++) { auto locale = requestedLocales[i]; // Step 2.a. // // Use the base name to ignore any extension sequences. noExtensionsLocale = NewDependentString(cx, locale, 0, BaseNameLength(locale)); if (!noExtensionsLocale) { return false; } // Step 2.b. JSLinearString* availableLocale; JS_TRY_VAR_OR_RETURN_FALSE( cx, availableLocale, BestAvailableLocale(cx, availableLocales, noExtensionsLocale, defaultLocale)); // Step 2.c. if (availableLocale) { if (!supportedLocales.append(locale)) { return false; } } } // Step 3. return true; } /** * Returns the subset of requestedLocales for which availableLocales has a * matching (possibly fallback) locale. Locales appear in the same order in the * returned list as in the input list. * * Spec: ECMAScript Internationalization API Specification, 9.2.9. */ static bool SupportedLocales(JSContext* cx, AvailableLocaleKind availableLocales, Handle requestedLocales, Handle options, MutableHandle supportedLocales) { // Step 1. if (!options.isUndefined()) { // Step 1.a. Rooted obj(cx, ToObject(cx, options)); if (!obj) { return false; } // Step 1.b. LocaleMatcher localeMatcher; if (!GetLocaleMatcherOption(cx, obj, JSMSG_INVALID_LOCALE_MATCHER, &localeMatcher)) { return false; } } // Steps 2-5. // // We don't yet support anything better than the lookup matcher. return LookupSupportedLocales(cx, availableLocales, requestedLocales, supportedLocales); } /** * Returns the start and end indices of a "Unicode locale extension sequence", * which the specification defines as: "any substring of a language tag that * starts with a separator '-' and the singleton 'u' and includes the maximum * sequence of following non-singleton subtags and their preceding '-' * separators." * * Alternatively, this may be defined as: the components of a language tag that * match the `unicode_locale_extensions` production in UTS 35. * * Spec: ECMAScript Internationalization API Specification, 6.2.1. */ template static std::pair FindUnicodeExtensionSequence( mozilla::Range locale) { // Return early if the locale string is too small to hold any Unicode // extension sequences. (This is the common case, so handle it first.) // // Smallest language subtag has two characters. // Smallest Unicode extension sequence has five characters if (locale.length() < (2 + 5)) { return {}; } // Search for the start of a Unicode extension sequence. // // Begin searching after the smallest possible language subtag, namely // |2alpha|. End searching once the remaining characters can't fit the // smallest possible Unicode extension sequence, namely |"-u-" 2alphanum|. // Note the reduced end-limit means indexing inside the loop is always // in-range. size_t start = 0; for (size_t i = 2; i <= locale.length() - 5; i++) { // Search for "-u-" marking the start of a Unicode extension sequence. if (locale[i] == '-' && locale[i + 1] == 'u' && locale[i + 2] == '-') { start = i; break; } // And search for "-x-" marking the start of any privateuse component to // handle the case when "-u-" was only found within a privateuse subtag. if (locale[i] == '-' && locale[i + 1] == 'x' && locale[i + 2] == '-') { break; } } // Return if no Unicode extension sequence was found. if (start == 0) { return {}; } // Search for the start of the next singleton or privateuse subtag. // // Begin searching after the smallest possible Unicode locale extension // sequence, namely |"-u-" 2alphanum|. End searching once the remaining // characters can't fit the smallest possible privateuse subtag, namely // |"-x-" alphanum|. Note the reduced end-limit means indexing inside the loop // is always in-range. for (size_t i = start + 5; i <= locale.length() - 4; i++) { if (locale[i] != '-') { continue; } if (locale[i + 2] == '-') { return {start, i}; } // Skip over (i + 1) and (i + 2) because we've just verified they aren't // "-", so the next possible delimiter can only be at (i + 3). i += 2; } // If no singleton or privateuse subtag was found, the Unicode extension // sequence extends until the end of the string. return {start, locale.length()}; } static auto FindUnicodeExtensionSequence(const JSLinearString* locale) { JS::AutoCheckCannotGC nogc; if (locale->hasLatin1Chars()) { return FindUnicodeExtensionSequence(locale->latin1Range(nogc)); } return FindUnicodeExtensionSequence(locale->twoByteRange(nogc)); } void js::intl::LookupMatcherResult::trace(JSTracer* trc) { TraceNullableRoot(trc, &locale_, "LookupMatcherResult::locale"); TraceNullableRoot(trc, &extension_, "LookupMatcherResult::extension"); } /** * LookupMatchingLocaleByPrefix ( availableLocales, requestedLocales ) */ bool js::intl::LookupMatcher(JSContext* cx, AvailableLocaleKind availableLocales, Handle locales, MutableHandle result) { MOZ_RELEASE_ASSERT(IsPackedArray(locales)); Rooted defaultLocale( cx, cx->global()->globalIntlData().defaultLocale(cx)); if (!defaultLocale) { return false; } // Step 1. (Not applicable) // Step 2. Rooted locale(cx); Rooted noExtensionsLocale(cx); Rooted availableLocale(cx); for (size_t i = 0, length = locales->length(); i < length; i++) { locale = locales->getDenseElement(i).toString()->ensureLinear(cx); if (!locale) { return false; } // Step 2.a. // // Use the base name to ignore any extension sequences. noExtensionsLocale = NewDependentString(cx, locale, 0, BaseNameLength(locale)); if (!noExtensionsLocale) { return false; } // Step 2.b. JS_TRY_VAR_OR_RETURN_FALSE( cx, availableLocale, BestAvailableLocale(cx, availableLocales, noExtensionsLocale, defaultLocale)); // Step 2.c. if (availableLocale) { // Step 2.c.i. (Not applicable) // Step 2.c.ii. // // Search for Unicode extension sequences if |locale| contains any // extension subtags. JSLinearString* extension = nullptr; if (locale->length() > noExtensionsLocale->length()) { auto [startOfUnicodeExtensions, endOfUnicodeExtensions] = FindUnicodeExtensionSequence(locale); // Extract the Unicode extension sequence of |locale|. if (startOfUnicodeExtensions) { MOZ_ASSERT(startOfUnicodeExtensions < endOfUnicodeExtensions); MOZ_ASSERT(endOfUnicodeExtensions <= locale->length()); extension = NewDependentString( cx, locale, startOfUnicodeExtensions, endOfUnicodeExtensions - startOfUnicodeExtensions); if (!extension) { return false; } } } // Step 2.c.iii. result.set({availableLocale, extension}); return true; } } // Steps 3-5. result.set({defaultLocale, nullptr}); return true; } void js::intl::LocaleOptions::trace(JSTracer* trc) { for (auto& extension : extensions_) { TraceNullableRoot(trc, &extension, "LocaleOptions::extension"); } } JSLinearString* js::intl::ResolvedLocale::toLocale(JSContext* cx) const { if (keywords_.isEmpty()) { return dataLocale_; } JSStringBuilder sb(cx); if (!sb.append(dataLocale_)) { return nullptr; } if (!sb.append("-u")) { return nullptr; } for (auto key : keywords_) { static constexpr auto names = UnicodeExtensionKeyNames(); if (!sb.append('-') || !sb.append(names[key], 2)) { return nullptr; } auto* extension = extensions_[key]; MOZ_ASSERT(extension); if (!extension->empty() && !StringEqualsLiteral(extension, "true")) { if (!sb.append('-') || !sb.append(extension)) { return nullptr; } } } return sb.finishString(); } void js::intl::ResolvedLocale::trace(JSTracer* trc) { TraceNullableRoot(trc, &dataLocale_, "ResolvedLocale::dataLocale"); for (auto& extension : extensions_) { TraceNullableRoot(trc, &extension, "ResolvedLocale::extension"); } } /** * Unicode extension keywords found by UnicodeExtensionComponents. */ class UnicodeExtensionKeywords { // Start position and length of a Unicode extension keyword. using Value = std::pair; mozilla::EnumeratedArray keywords{}; public: /** * Return `true` if the Unicode extension |key| is present. */ bool has(UnicodeExtensionKey key) const { return keywords[key].first > 0; } /** * Get the Unicode extension for the argument |key|. */ const auto& get(UnicodeExtensionKey key) const { return keywords[key]; } /** * Get a mutable reference to the Unicode extension for the argument |key|. */ auto& get(UnicodeExtensionKey key) { return keywords[key]; } }; /** * UnicodeExtensionComponents ( extension ) */ template static auto UnicodeExtensionComponents( std::basic_string_view extension) { // Step 1. MOZ_ASSERT(std::all_of(extension.begin(), extension.end(), [](auto ch) { return mozilla::IsAscii(ch) && !mozilla::IsAsciiUppercaseAlpha(ch); })); // Step 2. MOZ_ASSERT(extension.length() >= 5); MOZ_ASSERT(extension[0] == '-'); MOZ_ASSERT(extension[1] == 'u'); MOZ_ASSERT(extension[2] == '-'); // Step 3. (Not applicable in our implementation.) // Step 4. UnicodeExtensionKeywords keywords{}; // Step 5. mozilla::Maybe key{}; // Steps 6-8. for (size_t k = 3; k < extension.length();) { // Step 8.a. size_t e = extension.find('-', k); // Step 8.b. size_t len = (e == extension.npos ? extension.length() : e) - k; // Step 8.c. auto subtag = extension.substr(k, len); // Steps 8.d-e. MOZ_ASSERT(len >= 2); // Steps 8.f-i if (len == 2) { key = ToUnicodeExtensionKey(subtag); if (key && !keywords.has(*key)) { // Record keyword start position. keywords.get(*key) = {k + 3, 0}; } else { // Ignore duplicate or irrelevant keywords. key = mozilla::Nothing(); } } else if (key) { // Update keyword length. auto& keyword = keywords.get(*key); if (keyword.second == 0) { keyword.second = len; } else { keyword.second += 1 + len; } } // Step 8.j. k = k + len + 1; } // Step 9. return keywords; } /** * UnicodeExtensionComponents ( extension ) */ static auto UnicodeExtensionComponents(const JSLinearString* extension) { MOZ_ASSERT(StringIsAscii(extension)); JS::AutoCheckCannotGC nogc; if (extension->hasLatin1Chars()) { auto* chars = extension->latin1Chars(nogc); std::string_view sv{reinterpret_cast(chars), extension->length()}; return UnicodeExtensionComponents(sv); } auto* chars = extension->twoByteChars(nogc); std::u16string_view sv{chars, extension->length()}; return UnicodeExtensionComponents(sv); } /** * Return `true` in |result| iff `string` is a supported calendar for the * requested locale. Otherwise set |result| to `false`. */ static bool IsSupportedCalendar(JSContext* cx, Handle loc, Handle string, bool* result) { MOZ_ASSERT(StringIsAscii(string)); auto locale = EncodeLocale(cx, loc); if (!locale) { return false; } auto keywords = mozilla::intl::Calendar::GetBcp47KeywordValuesForLocale(locale.get()); if (keywords.isErr()) { ReportInternalError(cx, keywords.unwrapErr()); return false; } for (auto keyword : keywords.unwrap()) { if (keyword.isErr()) { ReportInternalError(cx); return false; } auto calendar = keyword.unwrap(); if (StringEqualsAscii(string, calendar.data(), calendar.size())) { *result = true; return true; } } *result = false; return true; } /** * Return `true` in |result| iff `string` is a supported collation for the * requested locale. Otherwise set |result| to `false`. */ static bool IsSupportedCollation(JSContext* cx, Handle loc, Handle string, bool* result) { MOZ_ASSERT(StringIsAscii(string)); auto locale = EncodeLocale(cx, loc); if (!locale) { return false; } auto keywords = mozilla::intl::Collator::GetBcp47KeywordValuesForLocale(locale.get()); if (keywords.isErr()) { ReportInternalError(cx, keywords.unwrapErr()); return false; } for (auto keyword : keywords.unwrap()) { if (keyword.isErr()) { ReportInternalError(cx); return false; } auto collation = keyword.unwrap(); // Per ECMA-402, 10.2.3, we don't include standard and search: // // The values "standard" and "search" must not be used as elements in any // [[SortLocaleData]].[[]].[[co]] and // [[SearchLocaleData]].[[]].[[co]] List. static constexpr auto standard = mozilla::MakeStringSpan("standard"); static constexpr auto search = mozilla::MakeStringSpan("search"); if (collation == standard || collation == search) { continue; } if (StringEqualsAscii(string, collation.data(), collation.size())) { *result = true; return true; } } *result = false; return true; } /** * Return `true` in |result| iff `string` is a supported collation "case first" * value. Otherwise set |result| to `false`. */ template static bool IsSupportedCollationCaseFirst(mozilla::Range string) { // [[CaseFirst]] is one of the String values "upper", "lower", or "false". static constexpr auto caseFirst = std::to_array({ "false", "lower", "upper", }); return std::any_of(caseFirst.begin(), caseFirst.end(), [&](const auto& a) { return a.length() == string.length() && EqualChars(a.data(), string.begin().get(), a.length()); }); } static bool IsSupportedCollationCaseFirst(const JSLinearString* string) { MOZ_ASSERT(StringIsAscii(string)); JS::AutoCheckCannotGC nogc; if (string->hasLatin1Chars()) { return IsSupportedCollationCaseFirst(string->latin1Range(nogc)); } return IsSupportedCollationCaseFirst(string->twoByteRange(nogc)); } /** * Return `true` in |result| iff `string` is a supported collation "numeric" * value. Otherwise set |result| to `false`. */ template static bool IsSupportedCollationNumeric(mozilla::Range string) { // [[Numeric]] is a Boolean value. (We use the string representation here.) static constexpr auto numeric = std::to_array({ "false", "true", }); return std::any_of(numeric.begin(), numeric.end(), [&](const auto& a) { return a.length() == string.length() && EqualChars(a.data(), string.begin().get(), a.length()); }); } static bool IsSupportedCollationNumeric(const JSLinearString* string) { MOZ_ASSERT(StringIsAscii(string)); JS::AutoCheckCannotGC nogc; if (string->hasLatin1Chars()) { return IsSupportedCollationNumeric(string->latin1Range(nogc)); } return IsSupportedCollationNumeric(string->twoByteRange(nogc)); } /** * Return `true` in |result| iff `string` is a supported hour cycle value. * Otherwise set |result| to `false`. */ template static bool IsSupportedHourCycle(mozilla::Range string) { // [[LocaleData]].[[]].[[hc]] must be « null, "h11", "h12", "h23", // "h24" ». // // The `null` case is handled in the caller. static constexpr auto hourCycles = std::to_array({ "h11", "h12", "h23", "h24", }); return std::any_of(hourCycles.begin(), hourCycles.end(), [&](const auto& a) { return a.length() == string.length() && EqualChars(a.data(), string.begin().get(), a.length()); }); } static bool IsSupportedHourCycle(const JSLinearString* string) { // The hour cycle value can be `null`. if (!string) { return true; } MOZ_ASSERT(StringIsAscii(string)); JS::AutoCheckCannotGC nogc; if (string->hasLatin1Chars()) { return IsSupportedHourCycle(string->latin1Range(nogc)); } return IsSupportedHourCycle(string->twoByteRange(nogc)); } /** * Return `true` in |result| iff `string` is a supported numbering system. * Otherwise set |result| to `false`. */ template static bool IsSupportedNumberingSystem(std::basic_string_view string) { // ICU doesn't have an API to determine the set of numbering systems supported // for a locale; it generally pretends that any numbering system can be used // with any locale. Supporting a decimal numbering system (where only the // digits are replaced) is easy, so we offer them all here. Algorithmic // numbering systems are typically tied to one locale, so for lack of // information we don't offer them. // Sorted list of allowed decimal numbering systems. static constexpr auto numberingSystems = std::to_array( {NUMBERING_SYSTEMS_WITH_SIMPLE_DIGIT_MAPPINGS}); return std::binary_search(numberingSystems.begin(), numberingSystems.end(), string, [](const auto& a, const auto& b) { return CompareChars(a.data(), a.length(), b.data(), b.length()) < 0; }); } static bool IsSupportedNumberingSystem(const JSLinearString* string) { MOZ_ASSERT(StringIsAscii(string)); JS::AutoCheckCannotGC nogc; if (string->hasLatin1Chars()) { auto* chars = string->latin1Chars(nogc); std::string_view sv{reinterpret_cast(chars), string->length()}; return IsSupportedNumberingSystem(sv); } auto* chars = string->twoByteChars(nogc); std::u16string_view sv{chars, string->length()}; return IsSupportedNumberingSystem(sv); } /** * Return the default calendar of a locale. */ static JSLinearString* DefaultCalendar(JSContext* cx, Handle loc) { auto locale = EncodeLocale(cx, loc); if (!locale) { return nullptr; } auto calendar = mozilla::intl::Calendar::TryCreate(locale.get()); if (calendar.isErr()) { ReportInternalError(cx, calendar.unwrapErr()); return nullptr; } auto type = calendar.unwrap()->GetBcp47Type(); if (type.isErr()) { ReportInternalError(cx, type.unwrapErr()); return nullptr; } return NewStringCopy(cx, type.unwrap()); } /** * Return the default collation of a locale. */ static JSLinearString* DefaultCollationCaseFirst( JSContext* cx, Handle locale) { // If |locale| is the default locale (e.g. da-DK), but only supported through // a fallback (da), we need to get the actual locale before we can call // |sharedIntlData.isUpperCaseFirst|. Rooted actualLocale(cx); if (!BestAvailableLocale(cx, AvailableLocaleKind::Collator, locale, nullptr, &actualLocale)) { return nullptr; } auto& sharedIntlData = cx->runtime()->sharedIntlData.ref(); bool isUpperFirst; if (!sharedIntlData.isUpperCaseFirst(cx, actualLocale, &isUpperFirst)) { return nullptr; } if (isUpperFirst) { return cx->names().upper; } return cx->names().false_; } /** * Return the default numbering system of a locale. */ static JSLinearString* DefaultNumberingSystem(JSContext* cx, Handle loc) { auto locale = EncodeLocale(cx, loc); if (!locale) { return nullptr; } auto numberingSystem = mozilla::intl::NumberingSystem::TryCreate(locale.get()); if (numberingSystem.isErr()) { ReportInternalError(cx, numberingSystem.unwrapErr()); return nullptr; } auto name = numberingSystem.inspect()->GetName(); if (name.isErr()) { ReportInternalError(cx, name.unwrapErr()); return nullptr; } return NewStringCopy(cx, name.unwrap()); } /** * Check if a locale supports the requested value for a Unicode extension key. */ static bool IsSupported(JSContext* cx, LocaleData localeData, Handle locale, UnicodeExtensionKey key, Handle value, bool* result) { switch (key) { case UnicodeExtensionKey::Calendar: { return IsSupportedCalendar(cx, locale, value, result); } case UnicodeExtensionKey::Collation: { // Search collations can't use a different collation. if (localeData == LocaleData::CollatorSearch) { *result = false; return true; } return IsSupportedCollation(cx, locale, value, result); } case UnicodeExtensionKey::CollationCaseFirst: { *result = IsSupportedCollationCaseFirst(value); return true; } case UnicodeExtensionKey::CollationNumeric: { *result = IsSupportedCollationNumeric(value); return true; } case UnicodeExtensionKey::HourCycle: { *result = IsSupportedHourCycle(value); return true; } case UnicodeExtensionKey::NumberingSystem: { *result = IsSupportedNumberingSystem(value); return true; } } MOZ_CRASH("invalid Unicode extension key"); } /** * Return the locale-specific default value for a Unicode extension key. */ static bool DefaultValue(JSContext* cx, LocaleData localeData, Handle locale, UnicodeExtensionKey key, MutableHandle result) { switch (key) { case UnicodeExtensionKey::Calendar: { auto ca = DefaultCalendar(cx, locale); if (!ca) { return false; } result.set(ca); return true; } case UnicodeExtensionKey::Collation: { // The first element of the collations array must be |null| per ES2017 // Intl, 10.2.3 Internal Slots. result.set(nullptr); return true; } case UnicodeExtensionKey::CollationCaseFirst: { // Case first defaults to "false" for all search collations. if (localeData == LocaleData::CollatorSearch) { result.set(cx->names().false_); return true; } auto kf = DefaultCollationCaseFirst(cx, locale); if (!kf) { return false; } result.set(kf); return true; } case UnicodeExtensionKey::CollationNumeric: { // Numeric defaults to "false" for all locales. result.set(cx->names().false_); return true; } case UnicodeExtensionKey::HourCycle: { // The first element of [[LocaleData]].[[]].[[hc]] is |null|. result.set(nullptr); return true; } case UnicodeExtensionKey::NumberingSystem: { auto nu = DefaultNumberingSystem(cx, locale); if (!nu) { return false; } result.set(nu); return true; } } MOZ_CRASH("invalid Unicode extension key"); } /** * ResolveLocale ( availableLocales, requestedLocales, options, * relevantExtensionKeys, localeData ) */ bool js::intl::ResolveLocale( JSContext* cx, AvailableLocaleKind availableLocales, Handle requestedLocales, Handle options, mozilla::EnumSet relevantExtensionKeys, LocaleData localeData, JS::MutableHandle result) { // Steps 1-4. // // BestFitMatcher not implemented in this implementation. Rooted match(cx); if (!LookupMatcher(cx, availableLocales, requestedLocales, &match)) { return false; } // Step 5. auto foundLocale = match.locale(); // Steps 6-7. (Not applicable in our implementation.) // Step 8. result.set(ResolvedLocale{}); // Step 9. (Not applicable in our implementation.) // Steps 10-11. UnicodeExtensionKeywords keywords{}; if (match.extension()) { keywords = UnicodeExtensionComponents(match.extension()); } // Step 12. mozilla::EnumSet supportedKeywords = {}; // Step 13. Rooted> extensionValue(cx); Rooted keywordsValue(cx); Rooted optionsValue(cx); Rooted defaultValue(cx); for (auto key : relevantExtensionKeys) { // Steps 13.a-b. (Not applicable in our implementation.) extensionValue = mozilla::Nothing(); // Steps 13.c-d. (Moved below) // Step 13.e. bool isSupportedKeyword = false; // Step 13.f. if (keywords.has(key)) { // Step 13.f.i. auto [start, length] = keywords.get(key); // Step 13.f.ii. if (length > 0) { MOZ_ASSERT(start + length <= match.extension()->length()); keywordsValue = NewDependentString(cx, match.extension(), start, length); if (!keywordsValue) { return false; } } else { keywordsValue = cx->names().true_; } // Steps 13.f.iii-iv. (Moved below) } // Steps 13.g-k. // // Options override all. if (options.hasUnicodeExtension(key)) { // Step 13.g. (Not applicable in our implementation.) // Step 13.h. optionsValue = options.getUnicodeExtension(key); // Step 13.i. (Not applicable) // Step 13.j. // // String options are already canonicalized in our implementation. // Step 13.j.iii.i. // // No currently supported options value is an empty string. MOZ_ASSERT_IF(optionsValue, !optionsValue->empty()); bool supported; if (!IsSupported(cx, localeData, foundLocale, key, optionsValue, &supported)) { return false; } if (supported) { extensionValue = mozilla::Some(optionsValue.get()); if (optionsValue && keywords.has(key)) { MOZ_ASSERT(keywordsValue && !keywordsValue->empty()); isSupportedKeyword = EqualStrings(keywordsValue, optionsValue); } } } // Steps 13.f.iii-iv. // // Locale tag may override. if (extensionValue.isNothing() && keywords.has(key)) { MOZ_ASSERT(keywordsValue && !keywordsValue->empty()); bool supported; if (!IsSupported(cx, localeData, foundLocale, key, keywordsValue, &supported)) { return false; } if (supported) { extensionValue = mozilla::Some(keywordsValue.get()); isSupportedKeyword = true; } } // Locale data provides default value. if (extensionValue.isNothing()) { // Step 13.c. (Reordered) if (!DefaultValue(cx, localeData, foundLocale, key, &defaultValue)) { return false; } extensionValue = mozilla::Some(defaultValue.get()); // Step 13.d. (Not applicable in our implementation.) } // Step 13.l. if (isSupportedKeyword) { supportedKeywords += key; } // Step 13.m. result.setUnicodeExtension(key, *extensionValue); } // Step 14. result.setUnicodeKeywords(supportedKeywords); // Step 15. result.setDataLocale(foundLocale); // Step 16. return true; } ArrayObject* js::intl::LocalesListToArray(JSContext* cx, Handle locales) { auto* array = NewDenseFullyAllocatedArray(cx, locales.length()); if (!array) { return nullptr; } array->setDenseInitializedLength(locales.length()); for (size_t i = 0; i < locales.length(); i++) { array->initDenseElement(i, StringValue(locales[i])); } return array; } ArrayObject* js::intl::SupportedLocalesOf(JSContext* cx, AvailableLocaleKind availableLocales, Handle locales, Handle options) { Rooted requestedLocales(cx, cx); if (!CanonicalizeLocaleList(cx, locales, &requestedLocales)) { return nullptr; } Rooted supportedLocales(cx, cx); if (!SupportedLocales(cx, availableLocales, requestedLocales, options, &supportedLocales)) { return nullptr; } return LocalesListToArray(cx, supportedLocales); } /** * Certain old, commonly-used language tags that lack a script, are expected to * nonetheless imply one. This object maps these old-style tags to modern * equivalents. */ struct OldStyleLanguageTagMapping { std::string_view oldStyle; std::string_view modernStyle; // Provide a constructor to catch missing initializers in the mappings array. constexpr OldStyleLanguageTagMapping(std::string_view oldStyle, std::string_view modernStyle) : oldStyle(oldStyle), modernStyle(modernStyle) {} }; static constexpr OldStyleLanguageTagMapping oldStyleLanguageTagMappings[] = { {"pa-PK", "pa-Arab-PK"}, {"zh-CN", "zh-Hans-CN"}, {"zh-HK", "zh-Hant-HK"}, {"zh-SG", "zh-Hans-SG"}, {"zh-TW", "zh-Hant-TW"}, }; static std::string_view AddImplicitScriptToLocale(std::string_view locale) { for (const auto& [oldStyle, modernStyle] : oldStyleLanguageTagMappings) { if (locale == oldStyle) { return modernStyle; } } return {}; } JSLinearString* js::intl::ComputeDefaultLocale(JSContext* cx) { const char* locale = cx->realm()->getLocale(); if (!locale) { ReportOutOfMemory(cx); return nullptr; } auto span = mozilla::MakeStringSpan(locale); mozilla::intl::Locale tag; bool canParseLocale = mozilla::intl::LocaleParser::TryParse(span, tag).isOk() && tag.Canonicalize().isOk(); Rooted candidate(cx); if (!canParseLocale) { candidate = NewStringCopy(cx, LastDitchLocale()); if (!candidate) { return nullptr; } } else { // The default locale must be in [[AvailableLocales]], and that list must // not contain any locales with Unicode extension sequences, so remove any // present in the candidate. tag.ClearUnicodeExtension(); FormatBuffer buffer(cx); if (auto result = tag.ToString(buffer); result.isErr()) { ReportInternalError(cx, result.unwrapErr()); return nullptr; } // Certain old-style language tags lack a script code, but in current usage // they *would* include a script code. Map these over to modern forms. auto modernStyle = AddImplicitScriptToLocale({buffer.data(), buffer.length()}); if (modernStyle.empty()) { candidate = buffer.toAsciiString(cx); } else { candidate = NewStringCopy(cx, modernStyle); } if (!candidate) { return nullptr; } } // 9.1 Internal slots of Service Constructors // // - [[AvailableLocales]] is a List [...]. The list must include the value // returned by the DefaultLocale abstract operation (6.2.4), [...]. // // That implies we must ignore any candidate which isn't supported by all // Intl service constructors. Rooted supportedCollator(cx); JS_TRY_VAR_OR_RETURN_NULL( cx, supportedCollator, BestAvailableLocale(cx, AvailableLocaleKind::Collator, candidate, nullptr)); Rooted supportedDateTimeFormat(cx); JS_TRY_VAR_OR_RETURN_NULL( cx, supportedDateTimeFormat, BestAvailableLocale(cx, AvailableLocaleKind::DateTimeFormat, candidate, nullptr)); #ifdef DEBUG // Note: We don't test the supported locales of the remaining Intl service // constructors, because the set of supported locales is exactly equal to // the set of supported locales of Intl.DateTimeFormat. for (auto kind : { AvailableLocaleKind::DisplayNames, AvailableLocaleKind::DurationFormat, AvailableLocaleKind::ListFormat, AvailableLocaleKind::NumberFormat, AvailableLocaleKind::PluralRules, AvailableLocaleKind::RelativeTimeFormat, AvailableLocaleKind::Segmenter, }) { JSLinearString* supported; JS_TRY_VAR_OR_RETURN_NULL( cx, supported, BestAvailableLocale(cx, kind, candidate, nullptr)); MOZ_ASSERT(!!supported == !!supportedDateTimeFormat); MOZ_ASSERT_IF(supported, EqualStrings(supported, supportedDateTimeFormat)); } #endif // Accept the candidate locale if it is supported by all Intl service // constructors. if (supportedCollator && supportedDateTimeFormat) { // Use the actually supported locale instead of the candidate locale. For // example when the candidate locale "en-US-posix" is supported through // "en-US", use "en-US" as the default locale. // // Also prefer the supported locale with more subtags. For example when // requesting "de-CH" and Intl.DateTimeFormat supports "de-CH", but // Intl.Collator only "de", still return "de-CH" as the result. if (SameOrParentLocale(supportedCollator, supportedDateTimeFormat)) { return supportedDateTimeFormat; } return supportedCollator; } // Return the last ditch locale if the candidate locale isn't supported. return NewStringCopy(cx, LastDitchLocale()); }