/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- * vim: set ts=8 sts=2 et sw=2 tw=80: * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "builtin/intl/LocaleNegotiation.h" #include "mozilla/Assertions.h" #include "mozilla/intl/Locale.h" #include #include #include #include "builtin/Array.h" #include "builtin/intl/CommonFunctions.h" #include "builtin/intl/FormatBuffer.h" #include "builtin/intl/SharedIntlData.h" #include "builtin/intl/StringAsciiChars.h" #include "js/Conversions.h" #include "js/Result.h" #include "vm/ArrayObject.h" #include "vm/GlobalObject.h" #include "vm/JSContext.h" #include "vm/Realm.h" #include "vm/StringType.h" #include "vm/NativeObject-inl.h" #include "vm/ObjectOperations-inl.h" using namespace js; using namespace js::intl; static bool AssertCanonicalLocaleWithoutUnicodeExtension( JSContext* cx, Handle locale) { #ifdef DEBUG MOZ_ASSERT(StringIsAscii(locale), "language tags are ASCII-only"); // |locale| is a structurally valid language tag. mozilla::intl::Locale tag; using ParserError = mozilla::intl::LocaleParser::ParserError; mozilla::Result parse_result = Ok(); { intl::StringAsciiChars chars(locale); if (!chars.init(cx)) { return false; } parse_result = mozilla::intl::LocaleParser::TryParse(chars, tag); } if (parse_result.isErr()) { MOZ_ASSERT(parse_result.unwrapErr() == ParserError::OutOfMemory, "locale is a structurally valid language tag"); intl::ReportInternalError(cx); return false; } MOZ_ASSERT(!tag.GetUnicodeExtension(), "locale must contain no Unicode extensions"); if (auto result = tag.Canonicalize(); result.isErr()) { MOZ_ASSERT(result.unwrapErr() != mozilla::intl::Locale::CanonicalizationError::DuplicateVariant); intl::ReportInternalError(cx); return false; } intl::FormatBuffer buffer(cx); if (auto result = tag.ToString(buffer); result.isErr()) { intl::ReportInternalError(cx, result.unwrapErr()); return false; } MOZ_ASSERT(StringEqualsAscii(locale, buffer.data(), buffer.length()), "locale is a canonicalized language tag"); #endif return true; } static bool SameOrParentLocale(const JSLinearString* locale, const JSLinearString* otherLocale) { // Return true if |locale| is the same locale as |otherLocale|. if (locale->length() == otherLocale->length()) { return EqualStrings(locale, otherLocale); } // Also return true if |locale| is the parent locale of |otherLocale|. if (locale->length() < otherLocale->length()) { return HasSubstringAt(otherLocale, locale, 0) && otherLocale->latin1OrTwoByteChar(locale->length()) == '-'; } return false; } /** * 9.2.2 BestAvailableLocale ( availableLocales, locale ) * * Compares a BCP 47 language tag against the locales in availableLocales and * returns the best available match. Uses the fallback mechanism of RFC 4647, * section 3.4. * * Spec: ECMAScript Internationalization API Specification, 9.2.2. * Spec: RFC 4647, section 3.4. */ static JS::Result BestAvailableLocale( JSContext* cx, AvailableLocaleKind availableLocales, Handle locale, Handle defaultLocale) { // In the spec, [[availableLocales]] is formally a list of all available // locales. But in our implementation, it's an *incomplete* list, not // necessarily including the default locale (and all locales implied by it, // e.g. "de" implied by "de-CH"), if that locale isn't in every // [[availableLocales]] list (because that locale is supported through // fallback, e.g. "de-CH" supported through "de"). // // If we're considering the default locale, augment the spec loop with // additional checks to also test whether the current prefix is a prefix of // the default locale. intl::SharedIntlData& sharedIntlData = cx->runtime()->sharedIntlData.ref(); auto findLast = [](const auto* chars, size_t length) { auto rbegin = std::make_reverse_iterator(chars + length); auto rend = std::make_reverse_iterator(chars); auto p = std::find(rbegin, rend, '-'); // |dist(chars, p.base())| is equal to |dist(p, rend)|, pick whichever you // find easier to reason about when using reserve iterators. ptrdiff_t r = std::distance(chars, p.base()); MOZ_ASSERT(r == std::distance(p, rend)); // But always subtract one to convert from the reverse iterator result to // the correspoding forward iterator value, because reserve iterators point // to one element past the forward iterator value. return r - 1; }; if (!AssertCanonicalLocaleWithoutUnicodeExtension(cx, locale)) { return cx->alreadyReportedError(); } // Step 1. Rooted candidate(cx, locale); // Step 2. while (true) { // Step 2.a. bool supported = false; if (!sharedIntlData.isAvailableLocale(cx, availableLocales, candidate, &supported)) { return cx->alreadyReportedError(); } if (supported) { return candidate.get(); } if (defaultLocale && SameOrParentLocale(candidate, defaultLocale)) { return candidate.get(); } // Step 2.b. ptrdiff_t pos; if (candidate->hasLatin1Chars()) { JS::AutoCheckCannotGC nogc; pos = findLast(candidate->latin1Chars(nogc), candidate->length()); } else { JS::AutoCheckCannotGC nogc; pos = findLast(candidate->twoByteChars(nogc), candidate->length()); } if (pos < 0) { return nullptr; } // Step 2.c. size_t length = size_t(pos); if (length >= 2 && candidate->latin1OrTwoByteChar(length - 2) == '-') { length -= 2; } // Step 2.d. candidate = NewDependentString(cx, candidate, 0, length); if (!candidate) { return cx->alreadyReportedError(); } } } // 9.2.2 BestAvailableLocale ( availableLocales, locale ) // // Carries an additional third argument in our implementation to provide the // default locale. See the doc-comment in the header file. bool js::intl::BestAvailableLocale(JSContext* cx, AvailableLocaleKind availableLocales, Handle locale, Handle defaultLocale, MutableHandle result) { JSLinearString* res; JS_TRY_VAR_OR_RETURN_FALSE( cx, res, BestAvailableLocale(cx, availableLocales, locale, defaultLocale)); if (res) { result.set(res); } else { result.set(nullptr); } return true; } template static size_t BaseNameLength(mozilla::Range locale) { // Search for the start of the first singleton subtag. for (size_t i = 0; i < locale.length(); i++) { if (locale[i] == '-') { MOZ_RELEASE_ASSERT(i + 2 < locale.length(), "invalid locale"); if (locale[i + 2] == '-') { return i; } } } return locale.length(); } static size_t BaseNameLength(JSLinearString* locale) { JS::AutoCheckCannotGC nogc; if (locale->hasLatin1Chars()) { return BaseNameLength(locale->latin1Range(nogc)); } return BaseNameLength(locale->twoByteRange(nogc)); } /** * Returns the subset of requestedLocales for which availableLocales has a * matching (possibly fallback) locale. Locales appear in the same order in the * returned list as in the input list. * * Spec: ECMAScript Internationalization API Specification, 9.2.7. * Spec: ECMAScript Internationalization API Specification, 9.2.8. */ static bool LookupSupportedLocales( JSContext* cx, AvailableLocaleKind availableLocales, Handle requestedLocales, MutableHandle supportedLocales) { // Step 1. MOZ_ASSERT(supportedLocales.empty()); Rooted defaultLocale( cx, cx->global()->globalIntlData().defaultLocale(cx)); if (!defaultLocale) { return false; } // Step 2. Rooted noExtensionsLocale(cx); Rooted availableLocale(cx); for (size_t i = 0; i < requestedLocales.length(); i++) { auto locale = requestedLocales[i]; // Step 2.a. // // Use the base name to ignore any extension sequences. noExtensionsLocale = NewDependentString(cx, locale, 0, BaseNameLength(locale)); if (!noExtensionsLocale) { return false; } // Step 2.b. JSLinearString* availableLocale; JS_TRY_VAR_OR_RETURN_FALSE( cx, availableLocale, BestAvailableLocale(cx, availableLocales, noExtensionsLocale, defaultLocale)); // Step 2.c. if (availableLocale) { if (!supportedLocales.append(locale)) { return false; } } } // Step 3. return true; } /** * Returns the subset of requestedLocales for which availableLocales has a * matching (possibly fallback) locale. Locales appear in the same order in the * returned list as in the input list. * * Spec: ECMAScript Internationalization API Specification, 9.2.9. */ static bool SupportedLocales(JSContext* cx, AvailableLocaleKind availableLocales, Handle requestedLocales, Handle options, MutableHandle supportedLocales) { // Step 1. if (!options.isUndefined()) { // Step 1.a. Rooted obj(cx, ToObject(cx, options)); if (!obj) { return false; } // Step 1.b. Rooted localeMatcher(cx); if (!GetProperty(cx, obj, obj, cx->names().localeMatcher, &localeMatcher)) { return false; } if (!localeMatcher.isUndefined()) { JSString* str = ToString(cx, localeMatcher); if (!str) { return false; } JSLinearString* linear = str->ensureLinear(cx); if (!linear) { return false; } if (!StringEqualsLiteral(linear, "lookup") && !StringEqualsLiteral(linear, "best fit")) { if (auto chars = QuoteString(cx, linear)) { JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_INVALID_LOCALE_MATCHER, chars.get()); } return false; } } } // Steps 2-5. // // We don't yet support anything better than the lookup matcher. return LookupSupportedLocales(cx, availableLocales, requestedLocales, supportedLocales); } ArrayObject* js::intl::LocalesListToArray(JSContext* cx, Handle locales) { auto* array = NewDenseFullyAllocatedArray(cx, locales.length()); if (!array) { return nullptr; } array->setDenseInitializedLength(locales.length()); for (size_t i = 0; i < locales.length(); i++) { array->initDenseElement(i, StringValue(locales[i])); } return array; } ArrayObject* js::intl::SupportedLocalesOf(JSContext* cx, AvailableLocaleKind availableLocales, Handle locales, Handle options) { Rooted requestedLocales(cx, cx); if (!CanonicalizeLocaleList(cx, locales, &requestedLocales)) { return nullptr; } Rooted supportedLocales(cx, cx); if (!SupportedLocales(cx, availableLocales, requestedLocales, options, &supportedLocales)) { return nullptr; } return LocalesListToArray(cx, supportedLocales); } JSLinearString* js::intl::ComputeDefaultLocale(JSContext* cx) { const char* locale = cx->realm()->getLocale(); if (!locale) { ReportOutOfMemory(cx); return nullptr; } auto span = mozilla::MakeStringSpan(locale); mozilla::intl::Locale tag; bool canParseLocale = mozilla::intl::LocaleParser::TryParse(span, tag).isOk() && tag.Canonicalize().isOk(); Rooted candidate(cx); if (!canParseLocale) { candidate = NewStringCopyZ(cx, intl::LastDitchLocale()); if (!candidate) { return nullptr; } } else { // The default locale must be in [[AvailableLocales]], and that list must // not contain any locales with Unicode extension sequences, so remove any // present in the candidate. tag.ClearUnicodeExtension(); intl::FormatBuffer buffer(cx); if (auto result = tag.ToString(buffer); result.isErr()) { intl::ReportInternalError(cx, result.unwrapErr()); return nullptr; } candidate = buffer.toAsciiString(cx); if (!candidate) { return nullptr; } // Certain old-style language tags lack a script code, but in current // usage they *would* include a script code. Map these over to modern // forms. for (const auto& mapping : js::intl::oldStyleLanguageTagMappings) { const char* oldStyle = mapping.oldStyle; const char* modernStyle = mapping.modernStyle; if (StringEqualsAscii(candidate, oldStyle)) { candidate = NewStringCopyZ(cx, modernStyle); if (!candidate) { return nullptr; } break; } } } // 9.1 Internal slots of Service Constructors // // - [[AvailableLocales]] is a List [...]. The list must include the value // returned by the DefaultLocale abstract operation (6.2.4), [...]. // // That implies we must ignore any candidate which isn't supported by all // Intl service constructors. Rooted supportedCollator(cx); JS_TRY_VAR_OR_RETURN_NULL( cx, supportedCollator, BestAvailableLocale(cx, AvailableLocaleKind::Collator, candidate, nullptr)); Rooted supportedDateTimeFormat(cx); JS_TRY_VAR_OR_RETURN_NULL( cx, supportedDateTimeFormat, BestAvailableLocale(cx, AvailableLocaleKind::DateTimeFormat, candidate, nullptr)); #ifdef DEBUG // Note: We don't test the supported locales of the remaining Intl service // constructors, because the set of supported locales is exactly equal to // the set of supported locales of Intl.DateTimeFormat. for (auto kind : { AvailableLocaleKind::DisplayNames, AvailableLocaleKind::DurationFormat, AvailableLocaleKind::ListFormat, AvailableLocaleKind::NumberFormat, AvailableLocaleKind::PluralRules, AvailableLocaleKind::RelativeTimeFormat, AvailableLocaleKind::Segmenter, }) { JSLinearString* supported; JS_TRY_VAR_OR_RETURN_NULL( cx, supported, BestAvailableLocale(cx, kind, candidate, nullptr)); MOZ_ASSERT(!!supported == !!supportedDateTimeFormat); MOZ_ASSERT_IF(supported, EqualStrings(supported, supportedDateTimeFormat)); } #endif // Accept the candidate locale if it is supported by all Intl service // constructors. if (supportedCollator && supportedDateTimeFormat) { // Use the actually supported locale instead of the candidate locale. For // example when the candidate locale "en-US-posix" is supported through // "en-US", use "en-US" as the default locale. // // Also prefer the supported locale with more subtags. For example when // requesting "de-CH" and Intl.DateTimeFormat supports "de-CH", but // Intl.Collator only "de", still return "de-CH" as the result. if (SameOrParentLocale(supportedCollator, supportedDateTimeFormat)) { return supportedDateTimeFormat; } return supportedCollator; } // Return the last ditch locale if the candidate locale isn't supported. return NewStringCopyZ(cx, intl::LastDitchLocale()); }