/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- * vim: set ts=8 sts=2 et sw=2 tw=80: * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "builtin/RegExp.h" #include "mozilla/Casting.h" #include "mozilla/CheckedInt.h" #include "mozilla/TextUtils.h" #include "jsapi.h" #include "frontend/FrontendContext.h" // AutoReportFrontendContext #include "frontend/TokenStream.h" #include "irregexp/RegExpAPI.h" #include "jit/InlinableNatives.h" #include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_NEWREGEXP_FLAGGED #include "js/PropertySpec.h" #include "js/RegExpFlags.h" // JS::RegExpFlag, JS::RegExpFlags #include "util/StringBuilder.h" #include "vm/EqualityOperations.h" #include "vm/Interpreter.h" #include "vm/JSContext.h" #include "vm/RegExpObject.h" #include "vm/RegExpStatics.h" #include "vm/SelfHosting.h" #include "vm/EnvironmentObject-inl.h" #include "vm/GeckoProfiler-inl.h" #include "vm/JSObject-inl.h" #include "vm/ObjectOperations-inl.h" #include "vm/PlainObject-inl.h" using namespace js; using mozilla::AssertedCast; using mozilla::CheckedInt; using mozilla::IsAsciiDigit; using JS::CompileOptions; using JS::RegExpFlag; using JS::RegExpFlags; // Allocate an object for the |.groups| or |.indices.groups| property // of a regexp match result. static PlainObject* CreateGroupsObject(JSContext* cx, Handle groupsTemplate) { if (groupsTemplate->inDictionaryMode()) { return NewPlainObjectWithProto(cx, nullptr); } // The groups template object is stored in RegExpShared, which is shared // across compartments and realms. So watch out for the case when the template // object's realm is different from the current realm. if (cx->realm() != groupsTemplate->realm()) { return PlainObject::createWithTemplateFromDifferentRealm(cx, groupsTemplate); } return PlainObject::createWithTemplate(cx, groupsTemplate); } static inline void getValueAndIndex(HandleRegExpShared re, uint32_t i, Handle arr, MutableHandleValue val, uint32_t& valueIndex) { if (re->numNamedCaptures() == re->numDistinctNamedCaptures()) { valueIndex = re->getNamedCaptureIndex(i); val.set(arr->getDenseElement(valueIndex)); } else { mozilla::Span indicesSlice = re->getNamedCaptureIndices(i); MOZ_ASSERT(!indicesSlice.IsEmpty()); valueIndex = indicesSlice[0]; for (uint32_t index : indicesSlice) { val.set(arr->getDenseElement(index)); if (!val.isUndefined()) { valueIndex = index; break; } } } } /* * Implements RegExpBuiltinExec: Steps 18-35 * https://tc39.es/ecma262/#sec-regexpbuiltinexec */ bool js::CreateRegExpMatchResult(JSContext* cx, HandleRegExpShared re, HandleString input, const MatchPairs& matches, MutableHandleValue rval) { MOZ_ASSERT(re); MOZ_ASSERT(input); /* * Create the (slow) result array for a match. * * Array contents: * 0: matched string * 1..pairCount-1: paren matches * input: input string * index: start index for the match * groups: named capture groups for the match * indices: capture indices for the match, if required */ bool hasIndices = re->hasIndices(); // Get the shape for the output object. RegExpRealm::ResultShapeKind kind = hasIndices ? RegExpRealm::ResultShapeKind::WithIndices : RegExpRealm::ResultShapeKind::Normal; Rooted shape( cx, cx->global()->regExpRealm().getOrCreateMatchResultShape(cx, kind)); if (!shape) { return false; } // Steps 18-19 size_t numPairs = matches.length(); MOZ_ASSERT(numPairs > 0); // Steps 20-21: Allocate the match result object. Rooted arr( cx, NewDenseFullyAllocatedArrayWithShape(cx, numPairs, shape)); if (!arr) { return false; } // Steps 28-29 and 33 a-d: Initialize the elements of the match result. // Store a Value for each match pair. for (size_t i = 0; i < numPairs; i++) { const MatchPair& pair = matches[i]; if (pair.isUndefined()) { MOZ_ASSERT(i != 0); // Since we had a match, first pair must be present. arr->setDenseInitializedLength(i + 1); arr->initDenseElement(i, UndefinedValue()); } else { JSLinearString* str = NewDependentString(cx, input, pair.start, pair.length()); if (!str) { return false; } arr->setDenseInitializedLength(i + 1); arr->initDenseElement(i, StringValue(str)); } } // Step 34a (reordered): Allocate and initialize the indices object if needed. // This is an inlined implementation of MakeIndicesArray: // https://tc39.es/ecma262/#sec-makeindicesarray Rooted indices(cx); Rooted indicesGroups(cx); if (hasIndices) { // MakeIndicesArray: step 8 Rooted indicesShape( cx, cx->global()->regExpRealm().getOrCreateMatchResultShape( cx, RegExpRealm::ResultShapeKind::Indices)); if (!indicesShape) { return false; } indices = NewDenseFullyAllocatedArrayWithShape(cx, numPairs, indicesShape); if (!indices) { return false; } // MakeIndicesArray: steps 10-12 if (re->numNamedCaptures() > 0) { Rooted groupsTemplate(cx, re->getGroupsTemplate()); indicesGroups = CreateGroupsObject(cx, groupsTemplate); if (!indicesGroups) { return false; } indices->initSlot(RegExpRealm::IndicesGroupsSlot, ObjectValue(*indicesGroups)); } // MakeIndicesArray: step 13 a-d. (Step 13.e is implemented below.) for (size_t i = 0; i < numPairs; i++) { const MatchPair& pair = matches[i]; if (pair.isUndefined()) { // Since we had a match, first pair must be present. MOZ_ASSERT(i != 0); indices->setDenseInitializedLength(i + 1); indices->initDenseElement(i, UndefinedValue()); } else { ArrayObject* indexPair = NewDenseFullyAllocatedArray(cx, 2); if (!indexPair) { return false; } indexPair->setDenseInitializedLength(2); indexPair->initDenseElement(0, Int32Value(pair.start)); indexPair->initDenseElement(1, Int32Value(pair.limit)); indices->setDenseInitializedLength(i + 1); indices->initDenseElement(i, ObjectValue(*indexPair)); } } } // Steps 30-31 (reordered): Allocate the groups object (if needed). Rooted groups(cx); bool groupsInDictionaryMode = false; if (re->numNamedCaptures() > 0) { Rooted groupsTemplate(cx, re->getGroupsTemplate()); groupsInDictionaryMode = groupsTemplate->inDictionaryMode(); groups = CreateGroupsObject(cx, groupsTemplate); if (!groups) { return false; } } // Step 33 e-f: Initialize the properties of |groups| and |indices.groups|. // The groups template object stores the names of the named captures // in the the order in which they are defined. The named capture // indices vector stores the corresponding capture indices. In // dictionary mode, we have to define the properties explicitly. If // we are not in dictionary mode, we simply fill in the slots with // the correct values. if (groupsInDictionaryMode) { RootedIdVector keys(cx); Rooted groupsTemplate(cx, re->getGroupsTemplate()); if (!GetPropertyKeys(cx, groupsTemplate, 0, &keys)) { return false; } MOZ_ASSERT(keys.length() == re->numDistinctNamedCaptures()); RootedId key(cx); RootedValue val(cx); uint32_t valueIndex; for (uint32_t i = 0; i < keys.length(); i++) { key = keys[i]; getValueAndIndex(re, i, arr, &val, valueIndex); if (!NativeDefineDataProperty(cx, groups, key, val, JSPROP_ENUMERATE)) { return false; } // MakeIndicesArray: Step 13.e (reordered) if (hasIndices) { val = indices->getDenseElement(valueIndex); if (!NativeDefineDataProperty(cx, indicesGroups, key, val, JSPROP_ENUMERATE)) { return false; } } } } else { RootedValue val(cx); uint32_t valueIndex; for (uint32_t i = 0; i < re->numDistinctNamedCaptures(); i++) { getValueAndIndex(re, i, arr, &val, valueIndex); groups->initSlot(i, val); // MakeIndicesArray: Step 13.e (reordered) if (hasIndices) { indicesGroups->initSlot(i, indices->getDenseElement(valueIndex)); } } } // Step 22 (reordered). // Set the |index| property. arr->initSlot(RegExpRealm::MatchResultObjectIndexSlot, Int32Value(matches[0].start)); // Step 23 (reordered). // Set the |input| property. arr->initSlot(RegExpRealm::MatchResultObjectInputSlot, StringValue(input)); // Step 32 (reordered) // Set the |groups| property. if (groups) { arr->initSlot(RegExpRealm::MatchResultObjectGroupsSlot, ObjectValue(*groups)); } // Step 34b // Set the |indices| property. if (re->hasIndices()) { arr->initSlot(RegExpRealm::MatchResultObjectIndicesSlot, ObjectValue(*indices)); } #ifdef DEBUG RootedValue test(cx); RootedId id(cx, NameToId(cx->names().index)); if (!NativeGetProperty(cx, arr, id, &test)) { return false; } MOZ_ASSERT(test == arr->getSlot(RegExpRealm::MatchResultObjectIndexSlot)); id = NameToId(cx->names().input); if (!NativeGetProperty(cx, arr, id, &test)) { return false; } MOZ_ASSERT(test == arr->getSlot(RegExpRealm::MatchResultObjectInputSlot)); #endif // Step 35. rval.setObject(*arr); return true; } static int32_t CreateRegExpSearchResult(JSContext* cx, const MatchPairs& matches) { MOZ_ASSERT(matches[0].start >= 0); MOZ_ASSERT(matches[0].limit >= 0); MOZ_ASSERT(cx->regExpSearcherLastLimit == RegExpSearcherLastLimitSentinel); #ifdef DEBUG static_assert(JSString::MAX_LENGTH < RegExpSearcherLastLimitSentinel); MOZ_ASSERT(uint32_t(matches[0].limit) < RegExpSearcherLastLimitSentinel); #endif cx->regExpSearcherLastLimit = matches[0].limit; return matches[0].start; } /* * https://github.com/tc39/proposal-regexp-legacy-features/blob/master/README.md#regexpbuiltinexec--r-s- * */ static bool ShouldUpdateRegExpStatics(JSContext* cx, Handle regexp) { if (!JS::Prefs::experimental_legacy_regexp()) { return true; } // Step 5. Let thisRealm be the current Realm Record. JS::Realm* thisRealm = cx->realm(); // Step 6. Let rRealm be the value of R's [[Realm]] internal slot. JS::Realm* rRealm = regexp->realm(); // Step 7. If SameValue(thisRealm, rRealm) is true, then if (thisRealm == rRealm) { return regexp->legacyFeaturesEnabled(); } return false; } /* * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2 * steps 3, 9-14, except 12.a.i, 12.c.i.1. */ static RegExpRunStatus ExecuteRegExpImpl(JSContext* cx, RegExpStatics* res, MutableHandleRegExpShared re, Handle input, size_t searchIndex, VectorMatchPairs* matches, Handle regexp) { RegExpRunStatus status = RegExpShared::execute(cx, re, input, searchIndex, matches); /* Out of spec: Update RegExpStatics. */ if (status == RegExpRunStatus::Success && res) { if (ShouldUpdateRegExpStatics(cx, regexp)) { if (!res->updateFromMatchPairs(cx, input, *matches)) { return RegExpRunStatus::Error; } } else { res->invalidate(); } } return status; } /* Legacy ExecuteRegExp behavior is baked into the JSAPI. */ bool js::ExecuteRegExpLegacy(JSContext* cx, RegExpStatics* res, Handle reobj, Handle input, size_t* lastIndex, bool test, MutableHandleValue rval) { cx->check(reobj, input); RootedRegExpShared shared(cx, RegExpObject::getShared(cx, reobj)); if (!shared) { return false; } VectorMatchPairs matches; RegExpRunStatus status = ExecuteRegExpImpl(cx, res, &shared, input, *lastIndex, &matches, reobj); if (status == RegExpRunStatus::Error) { return false; } if (status == RegExpRunStatus::Success_NotFound) { /* ExecuteRegExp() previously returned an array or null. */ rval.setNull(); return true; } *lastIndex = matches[0].limit; if (test) { /* Forbid an array, as an optimization. */ rval.setBoolean(true); return true; } return CreateRegExpMatchResult(cx, shared, input, matches, rval); } static bool CheckPatternSyntaxSlow(JSContext* cx, Handle pattern, RegExpFlags flags) { LifoAllocScope allocScope(&cx->tempLifoAlloc()); AutoReportFrontendContext fc(cx); CompileOptions options(cx); frontend::DummyTokenStream dummyTokenStream(&fc, options); return irregexp::CheckPatternSyntax(cx, cx->stackLimitForCurrentPrincipal(), dummyTokenStream, pattern, flags); } static RegExpShared* CheckPatternSyntax(JSContext* cx, Handle pattern, RegExpFlags flags) { // If we already have a RegExpShared for this pattern/flags, we can // avoid the much slower CheckPatternSyntaxSlow call. RootedRegExpShared shared(cx, cx->zone()->regExps().maybeGet(pattern, flags)); if (shared) { #ifdef DEBUG // Assert the pattern is valid. if (!CheckPatternSyntaxSlow(cx, pattern, flags)) { MOZ_ASSERT(cx->isThrowingOutOfMemory() || cx->isThrowingOverRecursed()); return nullptr; } #endif return shared; } if (!CheckPatternSyntaxSlow(cx, pattern, flags)) { return nullptr; } // Allocate and return a new RegExpShared so we will hit the fast path // next time. return cx->zone()->regExps().get(cx, pattern, flags); } /* * ES 2016 draft Mar 25, 2016 21.2.3.2.2. * * Steps 14-15 set |obj|'s "lastIndex" property to zero. Some of * RegExpInitialize's callers have a fresh RegExp not yet exposed to script: * in these cases zeroing "lastIndex" is infallible. But others have a RegExp * whose "lastIndex" property might have been made non-writable: here, zeroing * "lastIndex" can fail. We efficiently solve this problem by completely * removing "lastIndex" zeroing from the provided function. * * CALLERS MUST HANDLE "lastIndex" ZEROING THEMSELVES! * * Because this function only ever returns a user-provided |obj| in the spec, * we omit it and just return the usual success/failure. */ static bool RegExpInitializeIgnoringLastIndex(JSContext* cx, Handle obj, HandleValue patternValue, HandleValue flagsValue) { Rooted pattern(cx); if (patternValue.isUndefined()) { /* Step 1. */ pattern = cx->names().empty_; } else { /* Step 2. */ pattern = ToAtom(cx, patternValue); if (!pattern) { return false; } } /* Step 3. */ RegExpFlags flags = RegExpFlag::NoFlags; if (!flagsValue.isUndefined()) { /* Step 4. */ RootedString flagStr(cx, ToString(cx, flagsValue)); if (!flagStr) { return false; } /* Step 5. */ if (!ParseRegExpFlags(cx, flagStr, &flags)) { return false; } } /* Steps 7-8. */ RegExpShared* shared = CheckPatternSyntax(cx, pattern, flags); if (!shared) { return false; } /* Steps 9-12. */ obj->initIgnoringLastIndex(pattern, flags); obj->setShared(shared); return true; } /* ES 2016 draft Mar 25, 2016 21.2.3.2.3. */ bool js::RegExpCreate(JSContext* cx, HandleValue patternValue, HandleValue flagsValue, MutableHandleValue rval, HandleObject newTarget) { /* Step 1. */ Rooted regexp(cx, RegExpAlloc(cx, GenericObject, newTarget)); if (!regexp) { return false; } /* Step 2. */ if (!RegExpInitializeIgnoringLastIndex(cx, regexp, patternValue, flagsValue)) { return false; } regexp->zeroLastIndex(cx); rval.setObject(*regexp); return true; } MOZ_ALWAYS_INLINE bool IsRegExpObject(HandleValue v) { return v.isObject() && v.toObject().is(); } /* ES6 draft rc3 7.2.8. */ bool js::IsRegExp(JSContext* cx, HandleValue value, bool* result) { /* Step 1. */ if (!value.isObject()) { *result = false; return true; } RootedObject obj(cx, &value.toObject()); /* Steps 2-3. */ RootedValue isRegExp(cx); RootedId matchId(cx, PropertyKey::Symbol(cx->wellKnownSymbols().match)); if (!GetProperty(cx, obj, obj, matchId, &isRegExp)) { return false; } /* Step 4. */ if (!isRegExp.isUndefined()) { *result = ToBoolean(isRegExp); return true; } /* Steps 5-6. */ ESClass cls; if (!GetClassOfValue(cx, value, &cls)) { return false; } *result = cls == ESClass::RegExp; return true; } // The "lastIndex" property is non-configurable, but it can be made // non-writable. If CalledFromJit is true, we have emitted guards to ensure it's // writable. template static bool SetLastIndex(JSContext* cx, Handle regexp, int32_t lastIndex) { MOZ_ASSERT(lastIndex >= 0); if (CalledFromJit || MOZ_LIKELY(RegExpObject::isInitialShape(regexp)) || regexp->lookupPure(cx->names().lastIndex)->writable()) { regexp->setLastIndex(cx, lastIndex); return true; } Rooted val(cx, Int32Value(lastIndex)); return SetProperty(cx, regexp, cx->names().lastIndex, val); } /* * RegExp.prototype.compile ( pattern, flags ) * https://github.com/tc39/proposal-regexp-legacy-features?tab=readme-ov-file#regexpprototypecompile--pattern-flags- * ES6 B.2.5.1. */ MOZ_ALWAYS_INLINE bool regexp_compile_impl(JSContext* cx, const CallArgs& args) { MOZ_ASSERT(IsRegExpObject(args.thisv())); Rooted regexp(cx, &args.thisv().toObject().as()); // Step 7. If Type(pattern) is Object and pattern has a [[RegExpMatcher]] // internal slot, then RootedValue patternValue(cx, args.get(0)); ESClass cls; if (!GetClassOfValue(cx, patternValue, &cls)) { return false; } if (cls == ESClass::RegExp) { // Step 7.i. If flags is not undefined, throw a TypeError exception. if (args.hasDefined(1)) { JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_NEWREGEXP_FLAGGED); return false; } // Beware! |patternObj| might be a proxy into another compartment, so // don't assume |patternObj.is()|. For the same reason, // don't reuse the RegExpShared below. RootedObject patternObj(cx, &patternValue.toObject()); Rooted sourceAtom(cx); RegExpFlags flags = RegExpFlag::NoFlags; { // Step 7.ii. Let P be the value of pattern’s [[OriginalSource]] internal // slot. RegExpShared* shared = RegExpToShared(cx, patternObj); if (!shared) { return false; } sourceAtom = shared->getSource(); flags = shared->getFlags(); } // Step 9, minus lastIndex zeroing. regexp->initIgnoringLastIndex(sourceAtom, flags); } else { // Step 8. RootedValue P(cx, patternValue); RootedValue F(cx, args.get(1)); // Step 9, minus lastIndex zeroing. if (!RegExpInitializeIgnoringLastIndex(cx, regexp, P, F)) { return false; } } // The final niggling bit of step 8. // // |regexp| is user-exposed, so its "lastIndex" property might be // non-writable. if (!SetLastIndex(cx, regexp, 0)) { return false; } args.rval().setObject(*regexp); return true; } static bool regexp_compile(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); if (JS::Prefs::experimental_legacy_regexp() && args.thisv().isObject()) { RootedObject thisObj(cx, &args.thisv().toObject()); JSObject* unwrapped = js::CheckedUnwrapStatic(thisObj); if (unwrapped && unwrapped->is()) { // Step 3. Let thisRealm be the current Realm Record. JS::Realm* thisRealm = cx->realm(); // Step 4. Let oRealm be the value of O’s [[Realm]] internal slot. RegExpObject* regexp = &unwrapped->as(); JS::Realm* oRealm = regexp->realm(); // Step 5. If SameValue(thisRealm, oRealm) is false, throw a TypeError // exception. if (thisRealm != oRealm) { JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_REGEXP_CROSS_REALM); return false; } // Step 6. If the value of R’s [[LegacyFeaturesEnabled]] internal slot is // false, throw a TypeError exception. if (!regexp->legacyFeaturesEnabled()) { JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_REGEXP_LEGACY_FEATURES_DISABLED); return false; } } } /* Steps 1-2. */ return CallNonGenericMethod(cx, args); } /* * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.3.1. */ bool js::regexp_construct(JSContext* cx, unsigned argc, Value* vp) { AutoJSConstructorProfilerEntry pseudoFrame(cx, "RegExp"); CallArgs args = CallArgsFromVp(argc, vp); RootedObject newTarget(cx); // Steps 1. bool patternIsRegExp; if (!IsRegExp(cx, args.get(0), &patternIsRegExp)) { return false; } if (!args.isConstructing()) { // Step 3.b. if (patternIsRegExp && !args.hasDefined(1)) { RootedObject patternObj(cx, &args[0].toObject()); // Step 3.b.i. RootedValue patternConstructor(cx); if (!GetProperty(cx, patternObj, patternObj, cx->names().constructor, &patternConstructor)) { return false; } // Step 3.b.ii. if (patternConstructor.isObject() && patternConstructor.toObject() == args.callee()) { args.rval().set(args[0]); return true; } } } else { newTarget = &args.newTarget().toObject(); } RootedValue patternValue(cx, args.get(0)); // Step 4. ESClass cls; if (!GetClassOfValue(cx, patternValue, &cls)) { return false; } if (cls == ESClass::RegExp) { // Beware! |patternObj| might be a proxy into another compartment, so // don't assume |patternObj.is()|. RootedObject patternObj(cx, &patternValue.toObject()); Rooted sourceAtom(cx); RegExpFlags flags; RootedRegExpShared shared(cx); { // Step 4.a. shared = RegExpToShared(cx, patternObj); if (!shared) { return false; } sourceAtom = shared->getSource(); // Step 4.b. // Get original flags in all cases, to compare with passed flags. flags = shared->getFlags(); // If the RegExpShared is in another Zone, don't reuse it. if (cx->zone() != shared->zone()) { shared = nullptr; } } // Step 7. RootedObject proto(cx); if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_RegExp, &proto)) { return false; } Rooted regexp( cx, RegExpAlloc(cx, GenericObject, proto, newTarget)); if (!regexp) { return false; } // Step 8. if (args.hasDefined(1)) { // Step 4.c / 21.2.3.2.2 RegExpInitialize step 4. RegExpFlags flagsArg = RegExpFlag::NoFlags; RootedString flagStr(cx, ToString(cx, args[1])); if (!flagStr) { return false; } if (!ParseRegExpFlags(cx, flagStr, &flagsArg)) { return false; } // Don't reuse the RegExpShared if we have different flags. if (flags != flagsArg) { shared = nullptr; } if ((!flags.unicode() && flagsArg.unicode()) || (!flags.unicodeSets() && flagsArg.unicodeSets())) { // Have to check syntax again when adding 'u' or 'v' flag. // https://tc39.es/ecma262/#sec-regexpinitialize // 22.2.3.3 step 13. shared = CheckPatternSyntax(cx, sourceAtom, flagsArg); if (!shared) { return false; } } flags = flagsArg; } regexp->initAndZeroLastIndex(sourceAtom, flags, cx); if (shared) { regexp->setShared(shared); } args.rval().setObject(*regexp); return true; } RootedValue P(cx); RootedValue F(cx); // Step 5. if (patternIsRegExp) { RootedObject patternObj(cx, &patternValue.toObject()); // Step 5.a. if (!GetProperty(cx, patternObj, patternObj, cx->names().source, &P)) { return false; } // Step 5.b. F = args.get(1); if (F.isUndefined()) { if (!GetProperty(cx, patternObj, patternObj, cx->names().flags, &F)) { return false; } } } else { // Steps 6.a-b. P = patternValue; F = args.get(1); } // Step 7. RootedObject proto(cx); if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_RegExp, &proto)) { return false; } Rooted regexp( cx, RegExpAlloc(cx, GenericObject, proto, newTarget)); if (!regexp) { return false; } // Step 8. if (!RegExpInitializeIgnoringLastIndex(cx, regexp, P, F)) { return false; } regexp->zeroLastIndex(cx); args.rval().setObject(*regexp); return true; } /* * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.3.1 * steps 4, 7-8. */ bool js::regexp_construct_raw_flags(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); MOZ_ASSERT(args.length() == 3); MOZ_ASSERT(!args.isConstructing()); // Step 4.a. Rooted sourceAtom(cx, AtomizeString(cx, args[0].toString())); if (!sourceAtom) { return false; } // Step 4.c. uint32_t rawFlags = args[1].toInt32(); JS::RegExpFlags flags = AssertedCast(rawFlags & RegExpFlag::AllFlags); // Self-hosted code can't check prefs efficiently. In some cases it will // call this with the flag set even when the pref is disabled, in which // case we should ignore it. // TODO(bug 2009034): Clean this up when we ship the proposal. bool legacy = args[2].toBoolean() && JS::Prefs::experimental_legacy_regexp(); // Step 7. RegExpObject* regexp = RegExpAlloc(cx, GenericObject); if (!regexp) { return false; } // Step 8. regexp->initAndZeroLastIndex(sourceAtom, flags, cx); regexp->setLegacyFeaturesEnabled(legacy); args.rval().setObject(*regexp); return true; } // This is a specialized implementation of "UnwrapAndTypeCheckThis" for RegExp // getters that need to return a special value for same-realm // %RegExp.prototype%. template static bool RegExpGetter(JSContext* cx, CallArgs& args, const char* methodName, Fn&& fn, HandleValue fallbackValue = UndefinedHandleValue) { JSObject* obj = nullptr; if (args.thisv().isObject()) { obj = &args.thisv().toObject(); if (IsWrapper(obj)) { obj = CheckedUnwrapStatic(obj); if (!obj) { ReportAccessDenied(cx); return false; } } } if (obj) { // Step 4ff if (obj->is()) { return fn(&obj->as()); } // Step 3.a. "If SameValue(R, %RegExp.prototype%) is true, return // undefined." // Or `return "(?:)"` for get RegExp.prototype.source. if (obj == cx->global()->maybeGetRegExpPrototype()) { args.rval().set(fallbackValue); return true; } // fall-through } // Step 2. and Step 3.b. JS_ReportErrorNumberLatin1(cx, GetErrorMessage, nullptr, JSMSG_INCOMPATIBLE_REGEXP_GETTER, methodName, InformalValueTypeName(args.thisv())); return false; } bool js::regexp_hasIndices(JSContext* cx, unsigned argc, JS::Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); return RegExpGetter(cx, args, "hasIndices", [args](RegExpObject* unwrapped) { args.rval().setBoolean(unwrapped->hasIndices()); return true; }); } // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161 // 21.2.5.5 get RegExp.prototype.global bool js::regexp_global(JSContext* cx, unsigned argc, JS::Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); return RegExpGetter(cx, args, "global", [args](RegExpObject* unwrapped) { args.rval().setBoolean(unwrapped->global()); return true; }); } // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161 // 21.2.5.6 get RegExp.prototype.ignoreCase bool js::regexp_ignoreCase(JSContext* cx, unsigned argc, JS::Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); return RegExpGetter(cx, args, "ignoreCase", [args](RegExpObject* unwrapped) { args.rval().setBoolean(unwrapped->ignoreCase()); return true; }); } // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161 // 21.2.5.9 get RegExp.prototype.multiline bool js::regexp_multiline(JSContext* cx, unsigned argc, JS::Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); return RegExpGetter(cx, args, "multiline", [args](RegExpObject* unwrapped) { args.rval().setBoolean(unwrapped->multiline()); return true; }); } // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161 // 21.2.5.12 get RegExp.prototype.source static bool regexp_source(JSContext* cx, unsigned argc, JS::Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); // Step 3.a. Return "(?:)" for %RegExp.prototype%. RootedValue fallback(cx, StringValue(cx->names().emptyRegExp_)); return RegExpGetter( cx, args, "source", [cx, args](RegExpObject* unwrapped) { Rooted src(cx, unwrapped->getSource()); MOZ_ASSERT(src); // Mark potentially cross-zone JSAtom. if (cx->zone() != unwrapped->zone()) { cx->markAtom(src); } // Step 7. JSString* escaped = EscapeRegExpPattern(cx, src); if (!escaped) { return false; } args.rval().setString(escaped); return true; }, fallback); } // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161 // 21.2.5.3 get RegExp.prototype.dotAll bool js::regexp_dotAll(JSContext* cx, unsigned argc, JS::Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); return RegExpGetter(cx, args, "dotAll", [args](RegExpObject* unwrapped) { args.rval().setBoolean(unwrapped->dotAll()); return true; }); } // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161 // 21.2.5.14 get RegExp.prototype.sticky bool js::regexp_sticky(JSContext* cx, unsigned argc, JS::Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); return RegExpGetter(cx, args, "sticky", [args](RegExpObject* unwrapped) { args.rval().setBoolean(unwrapped->sticky()); return true; }); } // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161 // 21.2.5.17 get RegExp.prototype.unicode bool js::regexp_unicode(JSContext* cx, unsigned argc, JS::Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); return RegExpGetter(cx, args, "unicode", [args](RegExpObject* unwrapped) { args.rval().setBoolean(unwrapped->unicode()); return true; }); } // https://arai-a.github.io/ecma262-compare/?pr=2418&id=sec-get-regexp.prototype.unicodesets // 21.2.6.19 get RegExp.prototype.unicodeSets bool js::regexp_unicodeSets(JSContext* cx, unsigned argc, JS::Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); return RegExpGetter(cx, args, "unicodeSets", [args](RegExpObject* unwrapped) { args.rval().setBoolean(unwrapped->unicodeSets()); return true; }); } const JSPropertySpec js::regexp_properties[] = { JS_SELF_HOSTED_GET("flags", "$RegExpFlagsGetter", 0), JS_INLINABLE_PSG("hasIndices", regexp_hasIndices, 0, RegExpHasIndices), JS_INLINABLE_PSG("global", regexp_global, 0, RegExpGlobal), JS_INLINABLE_PSG("ignoreCase", regexp_ignoreCase, 0, RegExpIgnoreCase), JS_INLINABLE_PSG("multiline", regexp_multiline, 0, RegExpMultiline), JS_INLINABLE_PSG("dotAll", regexp_dotAll, 0, RegExpDotAll), JS_PSG("source", regexp_source, 0), JS_INLINABLE_PSG("sticky", regexp_sticky, 0, RegExpSticky), JS_INLINABLE_PSG("unicode", regexp_unicode, 0, RegExpUnicode), JS_INLINABLE_PSG("unicodeSets", regexp_unicodeSets, 0, RegExpUnicodeSets), JS_PS_END, }; const JSFunctionSpec js::regexp_methods[] = { JS_SELF_HOSTED_FN("toSource", "$RegExpToString", 0, 0), JS_SELF_HOSTED_FN("toString", "$RegExpToString", 0, 0), JS_FN("compile", regexp_compile, 2, 0), JS_SELF_HOSTED_FN("exec", "RegExp_prototype_Exec", 1, 0), JS_SELF_HOSTED_FN("test", "RegExpTest", 1, 0), JS_SELF_HOSTED_SYM_FN(match, "RegExpMatch", 1, 0), JS_SELF_HOSTED_SYM_FN(matchAll, "RegExpMatchAll", 1, 0), JS_SELF_HOSTED_SYM_FN(replace, "RegExpReplace", 2, 0), JS_SELF_HOSTED_SYM_FN(search, "RegExpSearch", 1, 0), JS_SELF_HOSTED_SYM_FN(split, "RegExpSplit", 2, 0), JS_FS_END, }; static constexpr JS::Latin1Char SHOULD_HEX_ESCAPE = JSString::MAX_LATIN1_CHAR; /** * Ascii escape map. * * 1. If a character is mapped to zero (0x00), then no escape sequence is used. * 2. Else, * a. If a character is mapped to SHOULD_HEX_ESCAPE, then hex-escape. * b. Else, escape with `\` followed by the mapped value. */ static constexpr auto AsciiRegExpEscapeMap() { std::array result = {}; // SyntaxCharacter or U+002F (SOLIDUS) result['^'] = '^'; result['$'] = '$'; result['\\'] = '\\'; result['.'] = '.'; result['*'] = '*'; result['+'] = '+'; result['?'] = '?'; result['('] = '('; result[')'] = ')'; result['['] = '['; result[']'] = ']'; result['{'] = '{'; result['}'] = '}'; result['|'] = '|'; result['/'] = '/'; // ControlEscape Code Point Values result['\t'] = 't'; result['\n'] = 'n'; result['\v'] = 'v'; result['\f'] = 'f'; result['\r'] = 'r'; // Other punctuators ",-=<>#&!%:;@~'`" or 0x0022 (QUOTATION MARK) result[','] = SHOULD_HEX_ESCAPE; result['-'] = SHOULD_HEX_ESCAPE; result['='] = SHOULD_HEX_ESCAPE; result['<'] = SHOULD_HEX_ESCAPE; result['>'] = SHOULD_HEX_ESCAPE; result['#'] = SHOULD_HEX_ESCAPE; result['&'] = SHOULD_HEX_ESCAPE; result['!'] = SHOULD_HEX_ESCAPE; result['%'] = SHOULD_HEX_ESCAPE; result[':'] = SHOULD_HEX_ESCAPE; result[';'] = SHOULD_HEX_ESCAPE; result['@'] = SHOULD_HEX_ESCAPE; result['~'] = SHOULD_HEX_ESCAPE; result['\''] = SHOULD_HEX_ESCAPE; result['`'] = SHOULD_HEX_ESCAPE; result['"'] = SHOULD_HEX_ESCAPE; // WhiteSpace or LineTerminator result[' '] = SHOULD_HEX_ESCAPE; return result; } /** * EncodeForRegExpEscape ( c ) * * https://tc39.es/proposal-regex-escaping/#sec-encodeforregexpescape */ template [[nodiscard]] static bool EncodeForRegExpEscape( JSContext* cx, mozilla::Span chars, JSStringBuilder& sb) { MOZ_ASSERT(sb.empty()); const size_t length = chars.size(); if (length == 0) { return true; } static constexpr auto asciiEscapeMap = AsciiRegExpEscapeMap(); // Number of characters added when escaping. static constexpr size_t EscapeAddLength = 2 - 1; static constexpr size_t HexEscapeAddLength = 4 - 1; static constexpr size_t UnicodeEscapeAddLength = 6 - 1; // Initial scan to determine if escape sequences are needed and to compute // the output length. mozilla::CheckedInt outLength = length; // Leading Ascii alpha-numeric character is hex-escaped. size_t scanStart = 0; if (mozilla::IsAsciiAlphanumeric(chars[0])) { outLength += HexEscapeAddLength; scanStart = 1; } for (size_t i = scanStart; i < length; i++) { CharT ch = chars[i]; JS::Latin1Char escape = 0; if (mozilla::IsAscii(ch)) { escape = asciiEscapeMap[ch]; } else { // Surrogate pair. if (unicode::IsLeadSurrogate(ch) && i + 1 < length && unicode::IsTrailSurrogate(chars[i + 1])) { i += 1; continue; } // WhiteSpace or LineTerminator or unmatched surrogate. if (unicode::IsSpace(ch) || unicode::IsSurrogate(ch)) { escape = SHOULD_HEX_ESCAPE; } } if (!escape) { continue; } if (mozilla::IsAscii(escape)) { outLength += EscapeAddLength; } else if (ch <= JSString::MAX_LATIN1_CHAR) { outLength += HexEscapeAddLength; } else { outLength += UnicodeEscapeAddLength; } } if (!outLength.isValid()) { ReportAllocationOverflow(cx); return false; } // Return if no escape sequences are needed. if (outLength.value() == length) { return true; } MOZ_ASSERT(outLength.value() > length); // Inflating is fallible, so we have to convert to two-byte upfront. if constexpr (std::is_same_v) { if (!sb.ensureTwoByteChars()) { return false; } } // Allocate memory for the output using the final length. if (!sb.reserve(outLength.value())) { return false; } // NB: Lower case hex digits. static constexpr char HexDigits[] = "0123456789abcdef"; static_assert(std::char_traits::length(HexDigits) == 16); // Append |ch| as an escaped character. auto appendEscape = [&](JS::Latin1Char ch) { MOZ_ASSERT(mozilla::IsAscii(ch)); sb.infallibleAppend('\\'); sb.infallibleAppend(ch); }; // Append |ch| as a hex-escape sequence. auto appendHexEscape = [&](CharT ch) { MOZ_ASSERT(ch <= JSString::MAX_LATIN1_CHAR); sb.infallibleAppend('\\'); sb.infallibleAppend('x'); sb.infallibleAppend(HexDigits[(ch >> 4) & 0xf]); sb.infallibleAppend(HexDigits[ch & 0xf]); }; // Append |ch| as a Unicode-escape sequence. auto appendUnicodeEscape = [&](char16_t ch) { MOZ_ASSERT(ch > JSString::MAX_LATIN1_CHAR); sb.infallibleAppend('\\'); sb.infallibleAppend('u'); sb.infallibleAppend(HexDigits[(ch >> 12) & 0xf]); sb.infallibleAppend(HexDigits[(ch >> 8) & 0xf]); sb.infallibleAppend(HexDigits[(ch >> 4) & 0xf]); sb.infallibleAppend(HexDigits[ch & 0xf]); }; // Index after the last character which produced an escape sequence. size_t startUnescaped = 0; // Append unescaped characters from |startUnescaped| (inclusive) to |end| // (exclusive). auto appendUnescaped = [&](size_t end) { MOZ_ASSERT(startUnescaped <= end && end <= length); if (startUnescaped < end) { auto unescaped = chars.FromTo(startUnescaped, end); sb.infallibleAppend(unescaped.data(), unescaped.size()); } startUnescaped = end + 1; }; // Leading Ascii alpha-numeric character is hex-escaped. size_t start = 0; if (mozilla::IsAsciiAlphanumeric(chars[0])) { appendHexEscape(chars[0]); start = 1; startUnescaped = 1; } for (size_t i = start; i < length; i++) { CharT ch = chars[i]; JS::Latin1Char escape = 0; if (mozilla::IsAscii(ch)) { escape = asciiEscapeMap[ch]; } else { // Surrogate pair. if (unicode::IsLeadSurrogate(ch) && i + 1 < length && unicode::IsTrailSurrogate(chars[i + 1])) { i += 1; continue; } // WhiteSpace or LineTerminator or unmatched surrogate. if (unicode::IsSpace(ch) || unicode::IsSurrogate(ch)) { escape = SHOULD_HEX_ESCAPE; } } if (!escape) { continue; } appendUnescaped(i); if (mozilla::IsAscii(escape)) { appendEscape(escape); } else if (ch <= JSString::MAX_LATIN1_CHAR) { appendHexEscape(ch); } else { appendUnicodeEscape(ch); } } if (startUnescaped) { appendUnescaped(length); } MOZ_ASSERT(sb.length() == outLength.value(), "all characters were written"); return true; } [[nodiscard]] static bool EncodeForRegExpEscape(JSContext* cx, JSLinearString* string, JSStringBuilder& sb) { JS::AutoCheckCannotGC nogc; if (string->hasLatin1Chars()) { auto chars = mozilla::Span(string->latin1Range(nogc)); return EncodeForRegExpEscape(cx, chars, sb); } auto chars = mozilla::Span(string->twoByteRange(nogc)); return EncodeForRegExpEscape(cx, chars, sb); } /** * RegExp.escape ( S ) * * https://tc39.es/proposal-regex-escaping/ */ static bool regexp_escape(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); // Step 1. if (!args.get(0).isString()) { return ReportValueError(cx, JSMSG_UNEXPECTED_TYPE, JSDVG_SEARCH_STACK, args.get(0), nullptr, "not a string"); } Rooted string(cx, args[0].toString()->ensureLinear(cx)); if (!string) { return false; } // Step 2-5. JSStringBuilder sb(cx); if (!EncodeForRegExpEscape(cx, string, sb)) { return false; } // Return the input string if no escape sequences were added. if (sb.empty()) { args.rval().setString(string); return true; } auto* result = sb.finishString(); if (!result) { return false; } args.rval().setString(result); return true; } #define STATIC_PAREN_GETTER_CODE(parenNum) \ if (!res->createParen(cx, parenNum, args.rval())) return false; \ if (args.rval().isUndefined()) \ args.rval().setString(cx->runtime()->emptyString); \ return true /* * RegExp static properties. * * RegExp class static properties and their Perl counterparts: * * RegExp.input $_ * RegExp.lastMatch $& * RegExp.lastParen $+ * RegExp.leftContext $` * RegExp.rightContext $' */ static bool checkRegexpLegacyFeatures(JSContext* cx, const CallArgs& args, const char* name) { if (JS::Prefs::experimental_legacy_regexp()) { /* Step 1. Assert C is an object that has an internal slot named * internalSlotName.*/ JSObject* regexpCtor = GlobalObject::getOrCreateRegExpConstructor(cx, cx->global()); if (!regexpCtor) return false; /* Step 2. If SameValue(C, thisValue) is false, throw TypeError */ bool same = false; if (!args.thisv().isObject() || !SameValue(cx, args.thisv(), ObjectValue(*regexpCtor), &same) || !same) { JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_INCOMPATIBLE_RECEIVER, name, InformalValueTypeName(args.thisv())); return false; } /* Step 4. If val is empty, throw a TypeError exception */ RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); if (!res) return false; if (res->isInvalidated()) { JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_REGEXP_STATIC_EMPTY, name, InformalValueTypeName(args.thisv())); return false; } } return true; } #define DEFINE_STATIC_GETTER(name, code) \ static bool name(JSContext* cx, unsigned argc, Value* vp) { \ CallArgs args = CallArgsFromVp(argc, vp); \ RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); \ if (!res) return false; \ if (!checkRegexpLegacyFeatures(cx, args, #name)) return false; \ code; \ } DEFINE_STATIC_GETTER(static_input_getter, return res->createPendingInput(cx, args.rval())) DEFINE_STATIC_GETTER(static_lastMatch_getter, return res->createLastMatch(cx, args.rval())) DEFINE_STATIC_GETTER(static_lastParen_getter, return res->createLastParen(cx, args.rval())) DEFINE_STATIC_GETTER(static_leftContext_getter, return res->createLeftContext(cx, args.rval())) DEFINE_STATIC_GETTER(static_rightContext_getter, return res->createRightContext(cx, args.rval())) DEFINE_STATIC_GETTER(static_paren1_getter, STATIC_PAREN_GETTER_CODE(1)) DEFINE_STATIC_GETTER(static_paren2_getter, STATIC_PAREN_GETTER_CODE(2)) DEFINE_STATIC_GETTER(static_paren3_getter, STATIC_PAREN_GETTER_CODE(3)) DEFINE_STATIC_GETTER(static_paren4_getter, STATIC_PAREN_GETTER_CODE(4)) DEFINE_STATIC_GETTER(static_paren5_getter, STATIC_PAREN_GETTER_CODE(5)) DEFINE_STATIC_GETTER(static_paren6_getter, STATIC_PAREN_GETTER_CODE(6)) DEFINE_STATIC_GETTER(static_paren7_getter, STATIC_PAREN_GETTER_CODE(7)) DEFINE_STATIC_GETTER(static_paren8_getter, STATIC_PAREN_GETTER_CODE(8)) DEFINE_STATIC_GETTER(static_paren9_getter, STATIC_PAREN_GETTER_CODE(9)) #define DEFINE_STATIC_SETTER(name, code) \ static bool name(JSContext* cx, unsigned argc, Value* vp) { \ CallArgs args = CallArgsFromVp(argc, vp); \ RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); \ if (!res) return false; \ if (!checkRegexpLegacyFeatures(cx, args, #name)) return false; \ code; \ return true; \ } static bool static_input_setter(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); if (JS::Prefs::experimental_legacy_regexp()) { // Step 1. Assert C is an object that has an internal slot named // internalSlotName. JSObject* regexpCtor = GlobalObject::getOrCreateRegExpConstructor(cx, cx->global()); if (!regexpCtor) { return false; } // Step 2. If SameValue(C, thisValue) is false, throw a TypeError exception. bool same = false; if (!args.thisv().isObject() || !SameValue(cx, args.thisv(), ObjectValue(*regexpCtor), &same) || !same) { JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_INCOMPATIBLE_RECEIVER, InformalValueTypeName(args.thisv())); return false; } } RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); if (!res) { return false; } // Step 3. Let strVal be ? ToString(val). RootedString str(cx, ToString(cx, args.get(0))); if (!str) { return false; } // Step 4. Set the value of the internal slot of C named internalSlotName to // strVal. res->setPendingInput(str); args.rval().setString(str); return true; } #ifdef NIGHTLY_BUILD const JSPropertySpec js::regexp_static_props[] = { JS_PSGS("input", static_input_getter, static_input_setter, 0), JS_PSG("lastMatch", static_lastMatch_getter, 0), JS_PSG("lastParen", static_lastParen_getter, 0), JS_PSG("leftContext", static_leftContext_getter, 0), JS_PSG("rightContext", static_rightContext_getter, 0), JS_PSG("$1", static_paren1_getter, 0), JS_PSG("$2", static_paren2_getter, 0), JS_PSG("$3", static_paren3_getter, 0), JS_PSG("$4", static_paren4_getter, 0), JS_PSG("$5", static_paren5_getter, 0), JS_PSG("$6", static_paren6_getter, 0), JS_PSG("$7", static_paren7_getter, 0), JS_PSG("$8", static_paren8_getter, 0), JS_PSG("$9", static_paren9_getter, 0), JS_PSGS("$_", static_input_getter, static_input_setter, 0), JS_PSG("$&", static_lastMatch_getter, 0), JS_PSG("$+", static_lastParen_getter, 0), JS_PSG("$`", static_leftContext_getter, 0), JS_PSG("$'", static_rightContext_getter, 0), JS_SELF_HOSTED_SYM_GET(species, "$RegExpSpecies", 0), JS_PS_END, }; #else const JSPropertySpec js::regexp_static_props[] = { JS_PSGS("input", static_input_getter, static_input_setter, JSPROP_PERMANENT | JSPROP_ENUMERATE), JS_PSG("lastMatch", static_lastMatch_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), JS_PSG("lastParen", static_lastParen_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), JS_PSG("leftContext", static_leftContext_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), JS_PSG("rightContext", static_rightContext_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), JS_PSG("$1", static_paren1_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), JS_PSG("$2", static_paren2_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), JS_PSG("$3", static_paren3_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), JS_PSG("$4", static_paren4_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), JS_PSG("$5", static_paren5_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), JS_PSG("$6", static_paren6_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), JS_PSG("$7", static_paren7_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), JS_PSG("$8", static_paren8_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), JS_PSG("$9", static_paren9_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE), JS_PSGS("$_", static_input_getter, static_input_setter, JSPROP_PERMANENT), JS_PSG("$&", static_lastMatch_getter, JSPROP_PERMANENT), JS_PSG("$+", static_lastParen_getter, JSPROP_PERMANENT), JS_PSG("$`", static_leftContext_getter, JSPROP_PERMANENT), JS_PSG("$'", static_rightContext_getter, JSPROP_PERMANENT), JS_SELF_HOSTED_SYM_GET(species, "$RegExpSpecies", 0), JS_PS_END, }; #endif const JSFunctionSpec js::regexp_static_methods[] = { JS_FN("escape", regexp_escape, 1, 0), JS_FS_END, }; /* * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2 * steps 3, 9-14, except 12.a.i, 12.c.i.1. */ static RegExpRunStatus ExecuteRegExp(JSContext* cx, HandleObject regexp, HandleString string, int32_t lastIndex, VectorMatchPairs* matches) { /* * WARNING: Despite the presence of spec step comment numbers, this * algorithm isn't consistent with any ES6 version, draft or * otherwise. YOU HAVE BEEN WARNED. */ /* Steps 1-2 performed by the caller. */ Handle reobj = regexp.as(); RootedRegExpShared re(cx, RegExpObject::getShared(cx, reobj)); if (!re) { return RegExpRunStatus::Error; } RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); if (!res) { return RegExpRunStatus::Error; } Rooted input(cx, string->ensureLinear(cx)); if (!input) { return RegExpRunStatus::Error; } /* Handled by caller */ MOZ_ASSERT(lastIndex >= 0 && size_t(lastIndex) <= input->length()); /* Steps 4-8 performed by the caller. */ /* Steps 3, 10-14, except 12.a.i, 12.c.i.1. */ RegExpRunStatus status = ExecuteRegExpImpl(cx, res, &re, input, lastIndex, matches, reobj); if (status == RegExpRunStatus::Error) { return RegExpRunStatus::Error; } /* Steps 12.a.i, 12.c.i.i, 15 are done by Self-hosted function. */ return status; } /* * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2 * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */ static bool RegExpMatcherImpl(JSContext* cx, HandleObject regexp, HandleString string, int32_t lastIndex, MutableHandleValue rval) { /* Execute regular expression and gather matches. */ VectorMatchPairs matches; /* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */ RegExpRunStatus status = ExecuteRegExp(cx, regexp, string, lastIndex, &matches); if (status == RegExpRunStatus::Error) { return false; } /* Steps 12.a, 12.c. */ if (status == RegExpRunStatus::Success_NotFound) { rval.setNull(); return true; } /* Steps 16-25 */ RootedRegExpShared shared(cx, regexp->as().getShared()); return CreateRegExpMatchResult(cx, shared, string, matches, rval); } /* * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2 * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */ bool js::RegExpMatcher(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); MOZ_ASSERT(args.length() == 3); MOZ_ASSERT(IsRegExpObject(args[0])); MOZ_ASSERT(args[1].isString()); MOZ_ASSERT(args[2].isNumber()); RootedObject regexp(cx, &args[0].toObject()); RootedString string(cx, args[1].toString()); int32_t lastIndex; MOZ_ALWAYS_TRUE(ToInt32(cx, args[2], &lastIndex)); /* Steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */ return RegExpMatcherImpl(cx, regexp, string, lastIndex, args.rval()); } /* * Separate interface for use by the JITs. * This code cannot re-enter JIT code. */ bool js::RegExpMatcherRaw(JSContext* cx, HandleObject regexp, HandleString input, int32_t lastIndex, MatchPairs* maybeMatches, MutableHandleValue output) { MOZ_ASSERT(lastIndex >= 0 && size_t(lastIndex) <= input->length()); // RegExp execution was successful only if the pairs have actually been // filled in. Note that IC code always passes a nullptr maybeMatches. if (maybeMatches && maybeMatches->pairsRaw()[0] > MatchPair::NoMatch) { RootedRegExpShared shared(cx, regexp->as().getShared()); return CreateRegExpMatchResult(cx, shared, input, *maybeMatches, output); } return RegExpMatcherImpl(cx, regexp, input, lastIndex, output); } /* * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2 * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. * This code is inlined in CodeGenerator.cpp generateRegExpSearcherStub, * changes to this code need to get reflected in there too. */ static bool RegExpSearcherImpl(JSContext* cx, HandleObject regexp, HandleString string, int32_t lastIndex, int32_t* result) { /* Execute regular expression and gather matches. */ VectorMatchPairs matches; #ifdef DEBUG // Ensure we assert if RegExpSearcherLastLimit is called when there's no // match. cx->regExpSearcherLastLimit = RegExpSearcherLastLimitSentinel; #endif /* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */ RegExpRunStatus status = ExecuteRegExp(cx, regexp, string, lastIndex, &matches); if (status == RegExpRunStatus::Error) { return false; } /* Steps 12.a, 12.c. */ if (status == RegExpRunStatus::Success_NotFound) { *result = -1; return true; } /* Steps 16-25 */ *result = CreateRegExpSearchResult(cx, matches); return true; } /* * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2 * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */ bool js::RegExpSearcher(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); MOZ_ASSERT(args.length() == 3); MOZ_ASSERT(IsRegExpObject(args[0])); MOZ_ASSERT(args[1].isString()); MOZ_ASSERT(args[2].isNumber()); RootedObject regexp(cx, &args[0].toObject()); RootedString string(cx, args[1].toString()); int32_t lastIndex; MOZ_ALWAYS_TRUE(ToInt32(cx, args[2], &lastIndex)); /* Steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */ int32_t result = 0; if (!RegExpSearcherImpl(cx, regexp, string, lastIndex, &result)) { return false; } args.rval().setInt32(result); return true; } /* * Separate interface for use by the JITs. * This code cannot re-enter JIT code. */ bool js::RegExpSearcherRaw(JSContext* cx, HandleObject regexp, HandleString input, int32_t lastIndex, MatchPairs* maybeMatches, int32_t* result) { MOZ_ASSERT(lastIndex >= 0 && size_t(lastIndex) <= input->length()); // RegExp execution was successful only if the pairs have actually been // filled in. Note that IC code always passes a nullptr maybeMatches. if (maybeMatches && maybeMatches->pairsRaw()[0] > MatchPair::NoMatch) { *result = CreateRegExpSearchResult(cx, *maybeMatches); return true; } return RegExpSearcherImpl(cx, regexp, input, lastIndex, result); } bool js::RegExpSearcherLastLimit(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); MOZ_ASSERT(args.length() == 1); MOZ_ASSERT(args[0].isString()); // Assert the limit is not the sentinel value and is valid for this string. MOZ_ASSERT(cx->regExpSearcherLastLimit != RegExpSearcherLastLimitSentinel); MOZ_ASSERT(cx->regExpSearcherLastLimit <= args[0].toString()->length()); args.rval().setInt32(cx->regExpSearcherLastLimit); #ifdef DEBUG // Ensure we assert if this function is called again without a new call to // RegExpSearcher. cx->regExpSearcherLastLimit = RegExpSearcherLastLimitSentinel; #endif return true; } template static bool RegExpBuiltinExecMatchRaw(JSContext* cx, Handle regexp, HandleString input, int32_t lastIndex, MatchPairs* maybeMatches, MutableHandleValue output) { MOZ_ASSERT(lastIndex >= 0); MOZ_ASSERT(size_t(lastIndex) <= input->length()); MOZ_ASSERT_IF(!CalledFromJit, !maybeMatches); // RegExp execution was successful only if the pairs have actually been // filled in. Note that IC code always passes a nullptr maybeMatches. int32_t lastIndexNew = 0; if (CalledFromJit && maybeMatches && maybeMatches->pairsRaw()[0] > MatchPair::NoMatch) { RootedRegExpShared shared(cx, regexp->as().getShared()); if (!CreateRegExpMatchResult(cx, shared, input, *maybeMatches, output)) { return false; } lastIndexNew = (*maybeMatches)[0].limit; } else { VectorMatchPairs matches; RegExpRunStatus status = ExecuteRegExp(cx, regexp, input, lastIndex, &matches); if (status == RegExpRunStatus::Error) { return false; } if (status == RegExpRunStatus::Success_NotFound) { output.setNull(); lastIndexNew = 0; } else { RootedRegExpShared shared(cx, regexp->as().getShared()); if (!CreateRegExpMatchResult(cx, shared, input, matches, output)) { return false; } lastIndexNew = matches[0].limit; } } RegExpFlags flags = regexp->getFlags(); if (!flags.global() && !flags.sticky()) { return true; } return SetLastIndex(cx, regexp, lastIndexNew); } bool js::RegExpBuiltinExecMatchFromJit(JSContext* cx, Handle regexp, HandleString input, MatchPairs* maybeMatches, MutableHandleValue output) { int32_t lastIndex = 0; if (regexp->isGlobalOrSticky()) { lastIndex = regexp->getLastIndex().toInt32(); MOZ_ASSERT(lastIndex >= 0); if (size_t(lastIndex) > input->length()) { output.setNull(); return SetLastIndex(cx, regexp, 0); } } return RegExpBuiltinExecMatchRaw(cx, regexp, input, lastIndex, maybeMatches, output); } template static bool RegExpBuiltinExecTestRaw(JSContext* cx, Handle regexp, HandleString input, int32_t lastIndex, bool* result) { MOZ_ASSERT(lastIndex >= 0); MOZ_ASSERT(size_t(lastIndex) <= input->length()); VectorMatchPairs matches; RegExpRunStatus status = ExecuteRegExp(cx, regexp, input, lastIndex, &matches); if (status == RegExpRunStatus::Error) { return false; } *result = (status == RegExpRunStatus::Success); RegExpFlags flags = regexp->getFlags(); if (!flags.global() && !flags.sticky()) { return true; } int32_t lastIndexNew = *result ? matches[0].limit : 0; return SetLastIndex(cx, regexp, lastIndexNew); } bool js::RegExpBuiltinExecTestFromJit(JSContext* cx, Handle regexp, HandleString input, bool* result) { int32_t lastIndex = 0; if (regexp->isGlobalOrSticky()) { lastIndex = regexp->getLastIndex().toInt32(); MOZ_ASSERT(lastIndex >= 0); if (size_t(lastIndex) > input->length()) { *result = false; return SetLastIndex(cx, regexp, 0); } } return RegExpBuiltinExecTestRaw(cx, regexp, input, lastIndex, result); } using CapturesVector = GCVector; struct JSSubString { JSLinearString* base = nullptr; size_t offset = 0; size_t length = 0; JSSubString() = default; void initEmpty(JSLinearString* base) { this->base = base; offset = length = 0; } void init(JSLinearString* base, size_t offset, size_t length) { this->base = base; this->offset = offset; this->length = length; } }; static void GetParen(JSLinearString* matched, const JS::Value& capture, JSSubString* out) { if (capture.isUndefined()) { out->initEmpty(matched); return; } JSLinearString& captureLinear = capture.toString()->asLinear(); out->init(&captureLinear, 0, captureLinear.length()); } template static bool InterpretDollar(JSLinearString* matched, JSLinearString* string, size_t position, size_t tailPos, Handle captures, Handle namedCaptures, JSLinearString* replacement, const CharT* replacementBegin, const CharT* currentDollar, const CharT* replacementEnd, JSSubString* out, size_t* skip, uint32_t* currentNamedCapture) { MOZ_ASSERT(*currentDollar == '$'); /* If there is only a dollar, bail now. */ if (currentDollar + 1 >= replacementEnd) { return false; } // ES 2021 Table 57: Replacement Text Symbol Substitutions // https://tc39.es/ecma262/#table-replacement-text-symbol-substitutions char16_t c = currentDollar[1]; if (IsAsciiDigit(c)) { /* $n, $nn */ unsigned num = AsciiDigitToNumber(c); if (num > captures.length()) { // The result is implementation-defined. Do not substitute. return false; } const CharT* currentChar = currentDollar + 2; if (currentChar < replacementEnd) { c = *currentChar; if (IsAsciiDigit(c)) { unsigned tmpNum = 10 * num + AsciiDigitToNumber(c); // If num > captures.length(), the result is implementation-defined. // Consume next character only if num <= captures.length(). if (tmpNum <= captures.length()) { currentChar++; num = tmpNum; } } } if (num == 0) { // The result is implementation-defined. Do not substitute. return false; } *skip = currentChar - currentDollar; MOZ_ASSERT(num <= captures.length()); GetParen(matched, captures[num - 1], out); return true; } // '$<': Named Captures if (c == '<') { // Step 1. if (namedCaptures.length() == 0) { return false; } // Step 2.b const CharT* nameStart = currentDollar + 2; const CharT* nameEnd = js_strchr_limit(nameStart, '>', replacementEnd); // Step 2.c if (!nameEnd) { return false; } // Step 2.d // We precompute named capture replacements in InitNamedCaptures. // They are stored in the order in which we will need them, so here // we can just take the next one in the list. size_t nameLength = nameEnd - nameStart; *skip = nameLength + 3; // $<...> // Steps 2.d.iii-iv GetParen(matched, namedCaptures[*currentNamedCapture], out); *currentNamedCapture += 1; return true; } switch (c) { default: return false; case '$': out->init(replacement, currentDollar - replacementBegin, 1); break; case '&': out->init(matched, 0, matched->length()); break; case '`': out->init(string, 0, position); break; case '\'': if (tailPos >= string->length()) { out->initEmpty(matched); } else { out->init(string, tailPos, string->length() - tailPos); } break; } *skip = 2; return true; } template static bool FindReplaceLengthString(JSContext* cx, Handle matched, Handle string, size_t position, size_t tailPos, Handle captures, Handle namedCaptures, Handle replacement, size_t firstDollarIndex, size_t* sizep) { CheckedInt replen = replacement->length(); JS::AutoCheckCannotGC nogc; MOZ_ASSERT(firstDollarIndex < replacement->length()); const CharT* replacementBegin = replacement->chars(nogc); const CharT* currentDollar = replacementBegin + firstDollarIndex; const CharT* replacementEnd = replacementBegin + replacement->length(); uint32_t currentNamedCapture = 0; do { JSSubString sub; size_t skip; if (InterpretDollar(matched, string, position, tailPos, captures, namedCaptures, replacement, replacementBegin, currentDollar, replacementEnd, &sub, &skip, ¤tNamedCapture)) { if (sub.length > skip) { replen += sub.length - skip; } else { replen -= skip - sub.length; } currentDollar += skip; } else { currentDollar++; } currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd); } while (currentDollar); if (!replen.isValid()) { ReportAllocationOverflow(cx); return false; } *sizep = replen.value(); return true; } static bool FindReplaceLength(JSContext* cx, Handle matched, Handle string, size_t position, size_t tailPos, Handle captures, Handle namedCaptures, Handle replacement, size_t firstDollarIndex, size_t* sizep) { return replacement->hasLatin1Chars() ? FindReplaceLengthString( cx, matched, string, position, tailPos, captures, namedCaptures, replacement, firstDollarIndex, sizep) : FindReplaceLengthString( cx, matched, string, position, tailPos, captures, namedCaptures, replacement, firstDollarIndex, sizep); } /* * Precondition: |sb| already has necessary growth space reserved (as * derived from FindReplaceLength), and has been inflated to TwoByte if * necessary. */ template static void DoReplace(Handle matched, Handle string, size_t position, size_t tailPos, Handle captures, Handle namedCaptures, Handle replacement, size_t firstDollarIndex, StringBuilder& sb) { JS::AutoCheckCannotGC nogc; const CharT* replacementBegin = replacement->chars(nogc); const CharT* currentChar = replacementBegin; MOZ_ASSERT(firstDollarIndex < replacement->length()); const CharT* currentDollar = replacementBegin + firstDollarIndex; const CharT* replacementEnd = replacementBegin + replacement->length(); uint32_t currentNamedCapture = 0; do { /* Move one of the constant portions of the replacement value. */ size_t len = currentDollar - currentChar; sb.infallibleAppend(currentChar, len); currentChar = currentDollar; JSSubString sub; size_t skip; if (InterpretDollar(matched, string, position, tailPos, captures, namedCaptures, replacement, replacementBegin, currentDollar, replacementEnd, &sub, &skip, ¤tNamedCapture)) { sb.infallibleAppendSubstring(sub.base, sub.offset, sub.length); currentChar += skip; currentDollar += skip; } else { currentDollar++; } currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd); } while (currentDollar); sb.infallibleAppend(currentChar, replacement->length() - (currentChar - replacementBegin)); } /* * This function finds the list of named captures of the form * "$" in a replacement string and converts them into jsids, for * use in InitNamedReplacements. */ template static bool CollectNames(JSContext* cx, Handle replacement, size_t firstDollarIndex, MutableHandle> names) { JS::AutoCheckCannotGC nogc; MOZ_ASSERT(firstDollarIndex < replacement->length()); const CharT* replacementBegin = replacement->chars(nogc); const CharT* currentDollar = replacementBegin + firstDollarIndex; const CharT* replacementEnd = replacementBegin + replacement->length(); // https://tc39.es/ecma262/#table-45, "$<" section while (currentDollar && currentDollar + 1 < replacementEnd) { if (currentDollar[1] == '<') { // Step 2.b const CharT* nameStart = currentDollar + 2; const CharT* nameEnd = js_strchr_limit(nameStart, '>', replacementEnd); // Step 2.c if (!nameEnd) { return true; } // Step 2.d.i size_t nameLength = nameEnd - nameStart; JSAtom* atom = AtomizeChars(cx, nameStart, nameLength); if (!atom || !names.append(AtomToId(atom))) { return false; } currentDollar = nameEnd + 1; } else { currentDollar += 2; } currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd); } return true; } /* * When replacing named captures, the spec requires us to perform * `Get(match.groups, name)` for each "$". These `Get`s can be * script-visible; for example, RegExp can be extended with an `exec` * method that wraps `groups` in a proxy. To make sure that we do the * right thing, if a regexp has named captures, we find the named * capture replacements before beginning the actual replacement. * This guarantees that we will call GetProperty once and only once for * each "$" in the replacement string, in the correct order. * * This function precomputes the results of step 2 of the '$<' case * here: https://tc39.es/proposal-regexp-named-groups/#table-45, so * that when we need to access the nth named capture in InterpretDollar, * we can just use the nth value stored in namedCaptures. */ static bool InitNamedCaptures(JSContext* cx, Handle replacement, HandleObject groups, size_t firstDollarIndex, MutableHandle namedCaptures) { Rooted> names(cx, cx); if (replacement->hasLatin1Chars()) { if (!CollectNames(cx, replacement, firstDollarIndex, &names)) { return false; } } else { if (!CollectNames(cx, replacement, firstDollarIndex, &names)) { return false; } } // https://tc39.es/ecma262/#table-45, "$<" section RootedId id(cx); RootedValue capture(cx); for (uint32_t i = 0; i < names.length(); i++) { // Step 2.d.i id = names[i]; // Step 2.d.ii if (!GetProperty(cx, groups, groups, id, &capture)) { return false; } // Step 2.d.iii if (capture.isUndefined()) { if (!namedCaptures.append(capture)) { return false; } } else { // Step 2.d.iv JSString* str = ToString(cx, capture); if (!str) { return false; } JSLinearString* linear = str->ensureLinear(cx); if (!linear) { return false; } if (!namedCaptures.append(StringValue(linear))) { return false; } } } return true; } static bool NeedTwoBytes(Handle string, Handle replacement, Handle matched, Handle captures, Handle namedCaptures) { if (string->hasTwoByteChars()) { return true; } if (replacement->hasTwoByteChars()) { return true; } if (matched->hasTwoByteChars()) { return true; } for (const Value& capture : captures) { if (capture.isUndefined()) { continue; } if (capture.toString()->hasTwoByteChars()) { return true; } } for (const Value& capture : namedCaptures) { if (capture.isUndefined()) { continue; } if (capture.toString()->hasTwoByteChars()) { return true; } } return false; } // ES2024 draft rev d4927f9bc3706484c75dfef4bbcf5ba826d2632e // // 22.2.7.2 RegExpBuiltinExec ( R, S ) // https://tc39.es/ecma262/#sec-regexpbuiltinexec // // If `forTest` is true, this is called from `RegExp.prototype.test` and we can // avoid allocating a result object. bool js::RegExpBuiltinExec(JSContext* cx, Handle regexp, Handle string, bool forTest, MutableHandle rval) { // Step 2. uint64_t lastIndex; if (MOZ_LIKELY(regexp->getLastIndex().isInt32())) { lastIndex = std::max(regexp->getLastIndex().toInt32(), 0); } else { Rooted lastIndexVal(cx, regexp->getLastIndex()); if (!ToLength(cx, lastIndexVal, &lastIndex)) { return false; } } // Steps 3-5. bool globalOrSticky = regexp->isGlobalOrSticky(); // Step 7. if (!globalOrSticky) { lastIndex = 0; } else { // Steps 1, 13.a. if (lastIndex > string->length()) { if (!SetLastIndex(cx, regexp, 0)) { return false; } rval.set(forTest ? BooleanValue(false) : NullValue()); return true; } } MOZ_ASSERT(lastIndex <= string->length()); static_assert(JSString::MAX_LENGTH <= INT32_MAX, "lastIndex fits in int32_t"); // Steps 6, 8-35. RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); if (!res) { return false; } if (forTest) { bool result; if (!RegExpBuiltinExecTestRaw(cx, regexp, string, int32_t(lastIndex), &result)) { return false; } rval.setBoolean(result); return true; } return RegExpBuiltinExecMatchRaw(cx, regexp, string, int32_t(lastIndex), nullptr, rval); } bool js::IsOptimizableRegExpObject(JSObject* obj, JSContext* cx) { // Check the shape to ensure this is a plain RegExpObject with this realm's // RegExp.prototype as prototype and without any extra own properties. // The fuse check ensures RegExp.prototype is optimizable. bool optimizable = obj->shape() == cx->global()->maybeRegExpShapeWithDefaultProto() && cx->realm()->realmFuses.optimizeRegExpPrototypeFuse.intact(); MOZ_ASSERT_IF(optimizable, obj->is() && obj->as().realm() == cx->realm()); return optimizable; } // ES2024 draft rev d4927f9bc3706484c75dfef4bbcf5ba826d2632e // // 22.2.7.1 RegExpExec ( R, S ) // https://tc39.es/ecma262/#sec-regexpexec // // If `forTest` is true, this is called from `RegExp.prototype.test` and we can // avoid allocating a result object. bool js::RegExpExec(JSContext* cx, Handle regexp, Handle string, bool forTest, MutableHandle rval) { // Fast path for the case where `regexp` is a regular expression object with // the builtin `RegExp.prototype.exec` function. if (MOZ_LIKELY(IsOptimizableRegExpObject(regexp, cx))) { return RegExpBuiltinExec(cx, regexp.as(), string, forTest, rval); } // Step 1. Rooted exec(cx); Rooted execKey(cx, NameToId(cx->names().exec)); if (!GetProperty(cx, regexp, regexp, execKey, &exec)) { return false; } // Step 2. // If exec is the original RegExp.prototype.exec, use the same, faster, // path as for the case where exec isn't callable. PropertyName* execName = cx->names().RegExp_prototype_Exec; if (IsSelfHostedFunctionWithName(exec, execName) || !IsCallable(exec)) { // Steps 3-4. if (MOZ_LIKELY(regexp->is())) { return RegExpBuiltinExec(cx, regexp.as(), string, forTest, rval); } // Throw an exception if it's not a wrapped RegExpObject that we can safely // unwrap. if (!regexp->canUnwrapAs()) { Rooted thisv(cx, ObjectValue(*regexp)); return ReportIncompatibleSelfHostedMethod( cx, thisv, IncompatibleContext::RegExpExec); } // Call RegExpBuiltinExec in the regular expression's realm. Rooted unwrapped(cx, ®exp->unwrapAs()); { AutoRealm ar(cx, unwrapped); Rooted wrappedString(cx, string); if (!cx->compartment()->wrap(cx, &wrappedString)) { return false; } if (!RegExpBuiltinExec(cx, unwrapped, wrappedString, forTest, rval)) { return false; } } return cx->compartment()->wrap(cx, rval); } // Step 2.a. Rooted thisv(cx, ObjectValue(*regexp)); FixedInvokeArgs<1> args(cx); args[0].setString(string); if (!js::Call(cx, exec, thisv, args, rval, CallReason::CallContent)) { return false; } // Step 2.b. if (!rval.isObjectOrNull()) { JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_EXEC_NOT_OBJORNULL); return false; } // Step 2.c. if (forTest) { rval.setBoolean(rval.isObject()); } return true; } bool js::RegExpHasCaptureGroups(JSContext* cx, Handle obj, Handle input, bool* result) { // pairCount is only available for compiled regular expressions. if (!obj->hasShared() || obj->getShared()->kind() == RegExpShared::Kind::Unparsed) { Rooted shared(cx, RegExpObject::getShared(cx, obj)); if (!shared) { return false; } Rooted inputLinear(cx, input->ensureLinear(cx)); if (!inputLinear) { return false; } if (!RegExpShared::compileIfNecessary(cx, &shared, inputLinear, RegExpShared::CodeKind::Any)) { return false; } } MOZ_ASSERT(obj->getShared()->pairCount() >= 1); *result = obj->getShared()->pairCount() > 1; return true; } /* ES 2021 21.1.3.17.1 */ // https://tc39.es/ecma262/#sec-getsubstitution bool js::RegExpGetSubstitution(JSContext* cx, Handle matchResult, Handle string, size_t position, Handle replacement, size_t firstDollarIndex, HandleValue groups, MutableHandleValue rval) { MOZ_ASSERT(firstDollarIndex < replacement->length()); // Step 1 (skipped). // Step 10 (reordered). uint32_t matchResultLength = matchResult->length(); MOZ_ASSERT(matchResultLength > 0); MOZ_ASSERT(matchResultLength == matchResult->getDenseInitializedLength()); const Value& matchedValue = matchResult->getDenseElement(0); Rooted matched(cx, matchedValue.toString()->ensureLinear(cx)); if (!matched) { return false; } // Step 2. size_t matchLength = matched->length(); // Steps 3-5 (skipped). // Step 6. MOZ_ASSERT(position <= string->length()); uint32_t nCaptures = matchResultLength - 1; Rooted captures(cx, CapturesVector(cx)); if (!captures.reserve(nCaptures)) { return false; } // Step 7. for (uint32_t i = 1; i <= nCaptures; i++) { const Value& capture = matchResult->getDenseElement(i); if (capture.isUndefined()) { captures.infallibleAppend(capture); continue; } JSLinearString* captureLinear = capture.toString()->ensureLinear(cx); if (!captureLinear) { return false; } captures.infallibleAppend(StringValue(captureLinear)); } Rooted namedCaptures(cx, cx); if (groups.isObject()) { RootedObject groupsObj(cx, &groups.toObject()); if (!InitNamedCaptures(cx, replacement, groupsObj, firstDollarIndex, &namedCaptures)) { return false; } } else { MOZ_ASSERT(groups.isUndefined()); } // Step 8 (skipped). // Step 9. CheckedInt checkedTailPos(0); checkedTailPos += position; checkedTailPos += matchLength; if (!checkedTailPos.isValid()) { ReportAllocationOverflow(cx); return false; } uint32_t tailPos = checkedTailPos.value(); // Step 11. size_t reserveLength; if (!FindReplaceLength(cx, matched, string, position, tailPos, captures, namedCaptures, replacement, firstDollarIndex, &reserveLength)) { return false; } JSStringBuilder result(cx); if (NeedTwoBytes(string, replacement, matched, captures, namedCaptures)) { if (!result.ensureTwoByteChars()) { return false; } } if (!result.reserve(reserveLength)) { return false; } if (replacement->hasLatin1Chars()) { DoReplace(matched, string, position, tailPos, captures, namedCaptures, replacement, firstDollarIndex, result); } else { DoReplace(matched, string, position, tailPos, captures, namedCaptures, replacement, firstDollarIndex, result); } // Step 12. JSString* resultString = result.finishString(); if (!resultString) { return false; } rval.setString(resultString); return true; } bool js::GetFirstDollarIndex(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); MOZ_ASSERT(args.length() == 1); JSString* str = args[0].toString(); // Should be handled in different path. MOZ_ASSERT(str->length() != 0); int32_t index = -1; if (!GetFirstDollarIndexRaw(cx, str, &index)) { return false; } args.rval().setInt32(index); return true; } template static MOZ_ALWAYS_INLINE int GetFirstDollarIndexImpl(const TextChar* text, uint32_t textLen) { const TextChar* end = text + textLen; for (const TextChar* c = text; c != end; ++c) { if (*c == '$') { return c - text; } } return -1; } template int32_t js::GetFirstDollarIndexRawFlat(const StringT* text) { uint32_t len = text->length(); JS::AutoCheckCannotGC nogc; if (text->hasLatin1Chars()) { return GetFirstDollarIndexImpl(text->latin1Chars(nogc), len); } return GetFirstDollarIndexImpl(text->twoByteChars(nogc), len); } template int32_t js::GetFirstDollarIndexRawFlat( const JSLinearString* text); template int32_t js::GetFirstDollarIndexRawFlat( const JSOffThreadAtom* text); bool js::GetFirstDollarIndexRaw(JSContext* cx, JSString* str, int32_t* index) { JSLinearString* text = str->ensureLinear(cx); if (!text) { return false; } *index = GetFirstDollarIndexRawFlat(text); return true; } bool js::IsRegExpPrototypeOptimizable(JSContext* cx, unsigned argc, Value* vp) { // This can only be called from self-hosted code. CallArgs args = CallArgsFromVp(argc, vp); MOZ_ASSERT(args.length() == 0); bool optimizable = cx->realm()->realmFuses.optimizeRegExpPrototypeFuse.intact(); args.rval().setBoolean(optimizable); return true; } bool js::IsOptimizableRegExpObject(JSContext* cx, unsigned argc, Value* vp) { // This can only be called from self-hosted code. CallArgs args = CallArgsFromVp(argc, vp); MOZ_ASSERT(args.length() == 1); MOZ_ASSERT(args[0].isObject()); JSObject* obj = &args[0].toObject(); bool optimizable = IsOptimizableRegExpObject(obj, cx); args.rval().setBoolean(optimizable); return true; } /* * Pattern match the script to check if it is is indexing into a particular * object, e.g. 'function(a) { return b[a]; }'. Avoid calling the script in * such cases, which are used by javascript packers (particularly the popular * Dean Edwards packer) to efficiently encode large scripts. We only handle the * code patterns generated by such packers here. */ bool js::intrinsic_GetElemBaseForLambda(JSContext* cx, unsigned argc, Value* vp) { // This can only be called from self-hosted code. CallArgs args = CallArgsFromVp(argc, vp); MOZ_ASSERT(args.length() == 1); JSObject& lambda = args[0].toObject(); args.rval().setUndefined(); if (!lambda.is()) { return true; } RootedFunction fun(cx, &lambda.as()); if (!fun->isInterpreted() || fun->isClassConstructor()) { return true; } JSScript* script = JSFunction::getOrCreateScript(cx, fun); if (!script) { return false; } jsbytecode* pc = script->code(); /* * JSOp::GetAliasedVar tells us exactly where to find the base object 'b'. * Rule out the (unlikely) possibility of a function with environment * objects since it would make our environment walk off. */ if (JSOp(*pc) != JSOp::GetAliasedVar || fun->needsSomeEnvironmentObject()) { return true; } EnvironmentCoordinate ec(pc); EnvironmentObject* env = &fun->environment()->as(); for (unsigned i = 0; i < ec.hops(); ++i) { env = &env->enclosingEnvironment().as(); } Value b = env->aliasedBinding(ec); pc += JSOpLength_GetAliasedVar; /* Look for 'a' to be the lambda's first argument. */ if (JSOp(*pc) != JSOp::GetArg || GET_ARGNO(pc) != 0) { return true; } pc += JSOpLength_GetArg; /* 'b[a]' */ if (JSOp(*pc) != JSOp::GetElem) { return true; } pc += JSOpLength_GetElem; /* 'return b[a]' */ if (JSOp(*pc) != JSOp::Return) { return true; } /* 'b' must behave like a normal object. */ if (!b.isObject()) { return true; } JSObject& bobj = b.toObject(); const JSClass* clasp = bobj.getClass(); if (!clasp->isNativeObject() || clasp->getOpsLookupProperty() || clasp->getOpsGetProperty()) { return true; } args.rval().setObject(bobj); return true; } /* * Emulates `b[a]` property access, that is detected in GetElemBaseForLambda. * It returns the property value only if the property is data property and the * property value is a string. Otherwise it returns undefined. */ bool js::intrinsic_GetStringDataProperty(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); MOZ_ASSERT(args.length() == 2); JSObject* obj = &args[0].toObject(); if (!obj->is()) { // The object is already checked to be native in GetElemBaseForLambda, // but it can be swapped to another class that is non-native. // Return undefined to mark failure to get the property. args.rval().setUndefined(); return true; } // No need to root |obj| because |AtomizeString| can't GC. JS::AutoCheckCannotGC nogc; JSAtom* atom = AtomizeString(cx, args[1].toString()); if (!atom) { return false; } Value v; if (GetPropertyPure(cx, obj, AtomToId(atom), &v) && v.isString()) { args.rval().set(v); } else { args.rval().setUndefined(); } return true; }