// Copyright 2025 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef V8_REGEXP_REGEXP_BYTECODES_INL_H_ #define V8_REGEXP_REGEXP_BYTECODES_INL_H_ #include "irregexp/imported/regexp-bytecodes.h" // Include the non-inl header before the rest of the headers. #include #include namespace v8 { namespace internal { template struct RegExpOperandTypeTraits; #define DECLARE_BASIC_OPERAND_TYPE_TRAITS(Name, CType) \ template <> \ struct RegExpOperandTypeTraits { \ static_assert(!std::is_pointer_v); \ static constexpr uint8_t kSize = sizeof(CType); \ using kCType = CType; \ static constexpr bool kIsBasic = true; \ }; BASIC_BYTECODE_OPERAND_TYPE_LIST(DECLARE_BASIC_OPERAND_TYPE_TRAITS) #undef DECLARE_OPERAND_TYPE_TRAITS #define DECLARE_SPECIAL_OPERAND_TYPE_TRAITS(Name, Size) \ template <> \ struct RegExpOperandTypeTraits { \ static constexpr uint8_t kSize = Size; \ static constexpr bool kIsBasic = false; \ }; SPECIAL_BYTECODE_OPERAND_TYPE_LIST(DECLARE_SPECIAL_OPERAND_TYPE_TRAITS) #undef DECLARE_OPERAND_TYPE_TRAITS namespace detail { // Bytecode is 4-byte aligned. // We can pack operands if multiple operands fit into 4 bytes. static constexpr int kBytecodeAlignment = 4; // Calculates packed offsets for each Bytecode operand. // The first operand can be packed together with the bytecode at an unaligned // offset 1. All other operands are aligned to their own size if // they are "basic" types. template consteval auto CalculatePackedOffsets() { constexpr int N = sizeof...(operand_types); constexpr std::array kOperandSizes = { RegExpOperandTypeTraits::kSize...}; constexpr std::array kIsBasic = { RegExpOperandTypeTraits::kIsBasic...}; std::array offsets{}; int first_offset = sizeof(RegExpBytecode); int offset = first_offset; for (size_t i = 0; i < N; ++i) { uint8_t operand_size = kOperandSizes[i]; // An operand is only allowed to be unaligned, if it's packed with the // bytecode. All subsequent basic operands must be aligned to their own // size. if (offset > first_offset && kIsBasic[i]) { offset = RoundUp(offset, operand_size); } // If the operand doesn't fit into the current 4-byte block, start a new // 4-byte block. if ((offset % kBytecodeAlignment) + operand_size > kBytecodeAlignment) { offset = RoundUp(offset); } offsets[i] = offset; offset += operand_size; } return offsets; } template struct RegExpBytecodeOperandsTraits { static constexpr int kOperandCount = sizeof...(ops); static constexpr std::array kOperandTypes = {ops...}; static constexpr std::array kOperandSizes = { RegExpOperandTypeTraits::kSize...}; static constexpr std::array kOperandOffsets = CalculatePackedOffsets(); static constexpr int kSize = RoundUp( kOperandCount == 0 ? sizeof(RegExpBytecode) : kOperandOffsets.back() + kOperandSizes.back()); }; template struct RegExpBytecodeOperandNames; #define DECLARE_OPERAND_NAMES(CamelName, SnakeName, OpNames, OpTypes) \ template <> \ struct RegExpBytecodeOperandNames { \ enum class Operand { UNPAREN(OpNames) }; \ using enum Operand; \ }; REGEXP_BYTECODE_LIST(DECLARE_OPERAND_NAMES) #undef DECLARE_OPERAND_NAMES template class RegExpBytecodeOperandsBase { public: using Operand = RegExpBytecodeOperandNames::Operand; using Traits = RegExpBytecodeOperandsTraits; static constexpr int kCount = Traits::kOperandCount; static constexpr int kTotalSize = Traits::kSize; static consteval int Index(Operand op) { return static_cast(op); } static consteval int Size(Operand op) { return Traits::kOperandSizes[Index(op)]; } static consteval int Offset(Operand op) { return Traits::kOperandOffsets[Index(op)]; } static consteval RegExpBytecodeOperandType Type(Operand op) { return Traits::kOperandTypes[Index(op)]; } private: template requires(RegExpOperandTypeTraits::kIsBasic) static auto GetAligned(const uint8_t* pc, int offset) { DCHECK_EQ(*pc, RegExpBytecodes::ToByte(bc)); using CType = RegExpOperandTypeTraits::kCType; DCHECK(IsAligned(offset, sizeof(CType))); return *reinterpret_cast(pc + offset); } // TODO(pthier): We can remove unaligned packing once we have fully switched // to the new bytecode layout. This is for backwards-compatibility with the // old layout only. template requires(RegExpOperandTypeTraits::kIsBasic) static auto GetPacked(const uint8_t* pc, int offset) { DCHECK_EQ(*pc, RegExpBytecodes::ToByte(bc)); // Only unaligned packing of 2-byte values with the bytecode is supported. DCHECK_EQ(offset, 1); static_assert(RegExpOperandTypeTraits::kSize == 2); using CType = RegExpOperandTypeTraits::kCType; DCHECK(!IsAligned(offset, sizeof(CType))); int32_t packed_value = *reinterpret_cast(pc); return static_cast(packed_value >> BYTECODE_SHIFT); } public: template requires(RegExpOperandTypeTraits::kIsBasic) static auto Get(const uint8_t* pc) { constexpr RegExpBytecodeOperandType OperandType = Type(op); constexpr int offset = Offset(op); using CType = RegExpOperandTypeTraits::kCType; // TODO(pthier): We can remove unaligned packing once we have fully switched // to the new bytecode layout. This is for backwards-compatibility with the // old layout only. if constexpr (!IsAligned(offset, sizeof(CType))) { return GetPacked(pc, offset); } else { return GetAligned(pc, offset); } } template requires(Type(op) == RegExpBytecodeOperandType::kBitTable) static auto Get(const uint8_t* pc) { DCHECK_EQ(*pc, RegExpBytecodes::ToByte(bc)); constexpr int offset = Offset(op); return pc + offset; } }; } // namespace detail #define PACK_OPTIONAL(x, ...) x __VA_OPT__(, ) __VA_ARGS__ #define DECLARE_OPERANDS(CamelName, SnakeName, OpNames, OpTypes) \ template <> \ class RegExpBytecodeOperands final \ : public detail::RegExpBytecodeOperandsBase, \ public AllStatic { \ public: \ using enum Operand; \ }; REGEXP_BYTECODE_LIST(DECLARE_OPERANDS) #undef DECLARE_OPERANDS namespace detail { #define DECLARE_BYTECODE_NAMES(CamelName, ...) #CamelName, static constexpr const char* kBytecodeNames[] = { REGEXP_BYTECODE_LIST(DECLARE_BYTECODE_NAMES)}; #undef DECLARE_BYTECODE_NAMES #define DECLARE_BYTECODE_SIZES(CamelName, ...) \ RegExpBytecodeOperands::kTotalSize, static constexpr uint8_t kBytecodeSizes[] = { REGEXP_BYTECODE_LIST(DECLARE_BYTECODE_SIZES)}; #undef DECLARE_BYTECODE_SIZES } // namespace detail // static constexpr const char* RegExpBytecodes::Name(RegExpBytecode bytecode) { return Name(ToByte(bytecode)); } // static constexpr const char* RegExpBytecodes::Name(uint8_t bytecode) { DCHECK_LT(bytecode, kCount); return detail::kBytecodeNames[bytecode]; } // static constexpr uint8_t RegExpBytecodes::Size(RegExpBytecode bytecode) { return Size(ToByte(bytecode)); } // static constexpr uint8_t RegExpBytecodes::Size(uint8_t bytecode) { DCHECK_LT(bytecode, kCount); return detail::kBytecodeSizes[bytecode]; } // Checks for backwards compatibility. // TODO(pthier): Remove once we removed the old bytecode format. static_assert(kRegExpBytecodeCount == RegExpBytecodes::kCount); #define CHECK_BYTECODE_VALUE(CamelName, SnakeName, ...) \ static_assert(RegExpBytecodes::ToByte(RegExpBytecode::k##CamelName) == \ BC_##SnakeName); REGEXP_BYTECODE_LIST(CHECK_BYTECODE_VALUE) #undef CHECK_BYTECODE_VALUE #define CHECK_LENGTH(CamelName, SnakeName, ...) \ static_assert(RegExpBytecodes::Size(RegExpBytecode::k##CamelName) == \ RegExpBytecodeLength(BC_##SnakeName)); REGEXP_BYTECODE_LIST(CHECK_LENGTH) #undef CHECK_LENGTH } // namespace internal } // namespace v8 #endif // V8_REGEXP_REGEXP_BYTECODES_INL_H_