Files
Library/node_modules/oniguruma-to-es/dist/esm/index.js

1877 lines
59 KiB
JavaScript
Raw Normal View History

2026-01-09 23:05:52 -05:00
// src/utils.js
var cp = String.fromCodePoint;
var r = String.raw;
var envFlags = {
flagGroups: (() => {
try {
new RegExp("(?i:)");
} catch {
return false;
}
return true;
})(),
unicodeSets: (() => {
try {
new RegExp("[[]]", "v");
} catch {
return false;
}
return true;
})()
};
envFlags.bugFlagVLiteralHyphenIsRange = envFlags.unicodeSets ? (() => {
try {
new RegExp(r`[\d\-a]`, "v");
} catch {
return true;
}
return false;
})() : false;
envFlags.bugNestedClassIgnoresNegation = envFlags.unicodeSets && new RegExp("[[^a]]", "v").test("a");
function getNewCurrentFlags(current, { enable, disable }) {
return {
dotAll: !disable?.dotAll && !!(enable?.dotAll || current.dotAll),
ignoreCase: !disable?.ignoreCase && !!(enable?.ignoreCase || current.ignoreCase)
};
}
function getOrInsert(map, key, defaultValue) {
if (!map.has(key)) {
map.set(key, defaultValue);
}
return map.get(key);
}
function isMinTarget(target, min) {
return EsVersion[target] >= EsVersion[min];
}
function throwIfNullish(value, msg) {
if (value == null) {
throw new Error(msg ?? "Value expected");
}
return value;
}
// src/options.js
var EsVersion = {
ES2025: 2025,
ES2024: 2024,
ES2018: 2018
};
var Target = (
/** @type {const} */
{
auto: "auto",
ES2025: "ES2025",
ES2024: "ES2024",
ES2018: "ES2018"
}
);
function getOptions(options = {}) {
if ({}.toString.call(options) !== "[object Object]") {
throw new Error("Unexpected options");
}
if (options.target !== void 0 && !Target[options.target]) {
throw new Error(`Unexpected target "${options.target}"`);
}
const opts = {
// Sets the level of emulation rigor/strictness.
accuracy: "default",
// Disables advanced emulation that relies on returning a `RegExp` subclass, resulting in
// certain patterns not being emulatable.
avoidSubclass: false,
// Oniguruma flags; a string with `i`, `m`, `x`, `D`, `S`, `W`, `y{g}` in any order (all
// optional). Oniguruma's `m` is equivalent to JavaScript's `s` (`dotAll`).
flags: "",
// Include JavaScript flag `g` (`global`) in the result.
global: false,
// Include JavaScript flag `d` (`hasIndices`) in the result.
hasIndices: false,
// Delay regex construction until first use if the transpiled pattern is at least this length.
lazyCompileLength: Infinity,
// JavaScript version used for generated regexes. Using `auto` detects the best value based on
// your environment. Later targets allow faster processing, simpler generated source, and
// support for additional features.
target: "auto",
// Disables minifications that simplify the pattern without changing the meaning.
verbose: false,
...options,
// Advanced options that override standard behavior, error checking, and flags when enabled.
rules: {
// Useful with TextMate grammars that merge backreferences across patterns.
allowOrphanBackrefs: false,
// Use ASCII `\b` and `\B`, which increases search performance of generated regexes.
asciiWordBoundaries: false,
// Allow unnamed captures and numbered calls (backreferences and subroutines) when using
// named capture. This is Oniguruma option `ONIG_OPTION_CAPTURE_GROUP`; on by default in
// `vscode-oniguruma`.
captureGroup: false,
// Change the recursion depth limit from Oniguruma's `20` to an integer `2``20`.
recursionLimit: 20,
// `^` as `\A`; `$` as`\Z`. Improves search performance of generated regexes without changing
// the meaning if searching line by line. This is Oniguruma option `ONIG_OPTION_SINGLELINE`.
singleline: false,
...options.rules
}
};
if (opts.target === "auto") {
opts.target = envFlags.flagGroups ? "ES2025" : envFlags.unicodeSets ? "ES2024" : "ES2018";
}
return opts;
}
// src/unicode.js
import { slug } from "oniguruma-parser/parser";
var asciiSpaceChar = "[ -\r ]";
var CharsWithoutIgnoreCaseExpansion = /* @__PURE__ */ new Set([
cp(304),
// İ
cp(305)
// ı
]);
var defaultWordChar = r`[\p{L}\p{M}\p{N}\p{Pc}]`;
function getIgnoreCaseMatchChars(char) {
if (CharsWithoutIgnoreCaseExpansion.has(char)) {
return [char];
}
const set = /* @__PURE__ */ new Set();
const lower = char.toLowerCase();
const upper = lower.toUpperCase();
const title = LowerToTitleCaseMap.get(lower);
const altLower = LowerToAlternativeLowerCaseMap.get(lower);
const altUpper = LowerToAlternativeUpperCaseMap.get(lower);
if ([...upper].length === 1) {
set.add(upper);
}
altUpper && set.add(altUpper);
title && set.add(title);
set.add(lower);
altLower && set.add(altLower);
return [...set];
}
var JsUnicodePropertyMap = /* @__PURE__ */ new Map(
`C Other
Cc Control cntrl
Cf Format
Cn Unassigned
Co Private_Use
Cs Surrogate
L Letter
LC Cased_Letter
Ll Lowercase_Letter
Lm Modifier_Letter
Lo Other_Letter
Lt Titlecase_Letter
Lu Uppercase_Letter
M Mark Combining_Mark
Mc Spacing_Mark
Me Enclosing_Mark
Mn Nonspacing_Mark
N Number
Nd Decimal_Number digit
Nl Letter_Number
No Other_Number
P Punctuation punct
Pc Connector_Punctuation
Pd Dash_Punctuation
Pe Close_Punctuation
Pf Final_Punctuation
Pi Initial_Punctuation
Po Other_Punctuation
Ps Open_Punctuation
S Symbol
Sc Currency_Symbol
Sk Modifier_Symbol
Sm Math_Symbol
So Other_Symbol
Z Separator
Zl Line_Separator
Zp Paragraph_Separator
Zs Space_Separator
ASCII
ASCII_Hex_Digit AHex
Alphabetic Alpha
Any
Assigned
Bidi_Control Bidi_C
Bidi_Mirrored Bidi_M
Case_Ignorable CI
Cased
Changes_When_Casefolded CWCF
Changes_When_Casemapped CWCM
Changes_When_Lowercased CWL
Changes_When_NFKC_Casefolded CWKCF
Changes_When_Titlecased CWT
Changes_When_Uppercased CWU
Dash
Default_Ignorable_Code_Point DI
Deprecated Dep
Diacritic Dia
Emoji
Emoji_Component EComp
Emoji_Modifier EMod
Emoji_Modifier_Base EBase
Emoji_Presentation EPres
Extended_Pictographic ExtPict
Extender Ext
Grapheme_Base Gr_Base
Grapheme_Extend Gr_Ext
Hex_Digit Hex
IDS_Binary_Operator IDSB
IDS_Trinary_Operator IDST
ID_Continue IDC
ID_Start IDS
Ideographic Ideo
Join_Control Join_C
Logical_Order_Exception LOE
Lowercase Lower
Math
Noncharacter_Code_Point NChar
Pattern_Syntax Pat_Syn
Pattern_White_Space Pat_WS
Quotation_Mark QMark
Radical
Regional_Indicator RI
Sentence_Terminal STerm
Soft_Dotted SD
Terminal_Punctuation Term
Unified_Ideograph UIdeo
Uppercase Upper
Variation_Selector VS
White_Space space
XID_Continue XIDC
XID_Start XIDS`.split(/\s/).map((p) => [slug(p), p])
);
var LowerToAlternativeLowerCaseMap = /* @__PURE__ */ new Map([
["s", cp(383)],
// s, ſ
[cp(383), "s"]
// ſ, s
]);
var LowerToAlternativeUpperCaseMap = /* @__PURE__ */ new Map([
[cp(223), cp(7838)],
// ß, ẞ
[cp(107), cp(8490)],
// k, (Kelvin)
[cp(229), cp(8491)],
// å, Å (Angstrom)
[cp(969), cp(8486)]
// ω, Ω (Ohm)
]);
var LowerToTitleCaseMap = new Map([
titleEntry(453),
titleEntry(456),
titleEntry(459),
titleEntry(498),
...titleRange(8072, 8079),
...titleRange(8088, 8095),
...titleRange(8104, 8111),
titleEntry(8124),
titleEntry(8140),
titleEntry(8188)
]);
var PosixClassMap = /* @__PURE__ */ new Map([
["alnum", r`[\p{Alpha}\p{Nd}]`],
["alpha", r`\p{Alpha}`],
["ascii", r`\p{ASCII}`],
["blank", r`[\p{Zs}\t]`],
["cntrl", r`\p{Cc}`],
["digit", r`\p{Nd}`],
["graph", r`[\P{space}&&\P{Cc}&&\P{Cn}&&\P{Cs}]`],
["lower", r`\p{Lower}`],
["print", r`[[\P{space}&&\P{Cc}&&\P{Cn}&&\P{Cs}]\p{Zs}]`],
["punct", r`[\p{P}\p{S}]`],
// Updated value from Onig 6.9.9; changed from Unicode `\p{punct}`
["space", r`\p{space}`],
["upper", r`\p{Upper}`],
["word", r`[\p{Alpha}\p{M}\p{Nd}\p{Pc}]`],
["xdigit", r`\p{AHex}`]
]);
function range(start, end) {
const range2 = [];
for (let i = start; i <= end; i++) {
range2.push(i);
}
return range2;
}
function titleEntry(codePoint) {
const char = cp(codePoint);
return [char.toLowerCase(), char];
}
function titleRange(start, end) {
return range(start, end).map((codePoint) => titleEntry(codePoint));
}
var UnicodePropertiesWithSpecificCase = /* @__PURE__ */ new Set([
"Lower",
"Lowercase",
"Upper",
"Uppercase",
"Ll",
"Lowercase_Letter",
"Lt",
"Titlecase_Letter",
"Lu",
"Uppercase_Letter"
// The `Changes_When_*` properties (and their aliases) could be included, but they're very rare.
// Some other properties include a handful of chars with specific cases only, but these chars are
// generally extreme edge cases and using such properties case insensitively generally produces
// undesired behavior anyway
]);
// src/transform.js
import { createAlternative, createAssertion, createBackreference, createCapturingGroup, createCharacter, createCharacterClass, createCharacterSet, createGroup, createLookaroundAssertion, createQuantifier, createSubroutine, createUnicodeProperty, hasOnlyChild, parse, slug as slug2 } from "oniguruma-parser/parser";
import { traverse } from "oniguruma-parser/traverser";
function transform(ast, options) {
const opts = {
// A couple edge cases exist where options `accuracy` and `bestEffortTarget` are used:
// - `CharacterSet` kind `text_segment` (`\X`): An exact representation would require heavy
// Unicode data; a best-effort approximation requires knowing the target.
// - `CharacterSet` kind `posix` with values `graph` and `print`: Their complex Unicode
// representations would be hard to change to ASCII versions after the fact in the generator
// based on `target`/`accuracy`, so produce the appropriate structure here.
accuracy: "default",
asciiWordBoundaries: false,
avoidSubclass: false,
bestEffortTarget: "ES2025",
...options
};
addParentProperties(ast);
const firstPassState = {
accuracy: opts.accuracy,
asciiWordBoundaries: opts.asciiWordBoundaries,
avoidSubclass: opts.avoidSubclass,
flagDirectivesByAlt: /* @__PURE__ */ new Map(),
jsGroupNameMap: /* @__PURE__ */ new Map(),
minTargetEs2024: isMinTarget(opts.bestEffortTarget, "ES2024"),
passedLookbehind: false,
strategy: null,
// Subroutines can appear before the groups they ref, so collect reffed nodes for a second pass
subroutineRefMap: /* @__PURE__ */ new Map(),
supportedGNodes: /* @__PURE__ */ new Set(),
digitIsAscii: ast.flags.digitIsAscii,
spaceIsAscii: ast.flags.spaceIsAscii,
wordIsAscii: ast.flags.wordIsAscii
};
traverse(ast, FirstPassVisitor, firstPassState);
const globalFlags = {
dotAll: ast.flags.dotAll,
ignoreCase: ast.flags.ignoreCase
};
const secondPassState = {
currentFlags: globalFlags,
prevFlags: null,
globalFlags,
groupOriginByCopy: /* @__PURE__ */ new Map(),
groupsByName: /* @__PURE__ */ new Map(),
multiplexCapturesToLeftByRef: /* @__PURE__ */ new Map(),
openRefs: /* @__PURE__ */ new Map(),
reffedNodesByReferencer: /* @__PURE__ */ new Map(),
subroutineRefMap: firstPassState.subroutineRefMap
};
traverse(ast, SecondPassVisitor, secondPassState);
const thirdPassState = {
groupsByName: secondPassState.groupsByName,
highestOrphanBackref: 0,
numCapturesToLeft: 0,
reffedNodesByReferencer: secondPassState.reffedNodesByReferencer
};
traverse(ast, ThirdPassVisitor, thirdPassState);
ast._originMap = secondPassState.groupOriginByCopy;
ast._strategy = firstPassState.strategy;
return ast;
}
var FirstPassVisitor = {
AbsenceFunction({ node, parent, replaceWith }) {
const { body, kind } = node;
if (kind === "repeater") {
const innerGroup = createGroup();
innerGroup.body[0].body.push(
// Insert own alts as `body`
createLookaroundAssertion({ negate: true, body }),
createUnicodeProperty("Any")
);
const outerGroup = createGroup();
outerGroup.body[0].body.push(
createQuantifier("greedy", 0, Infinity, innerGroup)
);
replaceWith(setParentDeep(outerGroup, parent), { traverse: true });
} else {
throw new Error(`Unsupported absence function "(?~|"`);
}
},
Alternative: {
enter({ node, parent, key }, { flagDirectivesByAlt }) {
const flagDirectives = node.body.filter((el) => el.kind === "flags");
for (let i = key + 1; i < parent.body.length; i++) {
const forwardSiblingAlt = parent.body[i];
getOrInsert(flagDirectivesByAlt, forwardSiblingAlt, []).push(...flagDirectives);
}
},
exit({ node }, { flagDirectivesByAlt }) {
if (flagDirectivesByAlt.get(node)?.length) {
const flags = getCombinedFlagModsFromFlagNodes(flagDirectivesByAlt.get(node));
if (flags) {
const flagGroup = createGroup({ flags });
flagGroup.body[0].body = node.body;
node.body = [setParentDeep(flagGroup, node)];
}
}
}
},
Assertion({ node, parent, key, container, root, remove, replaceWith }, state) {
const { kind, negate } = node;
const { asciiWordBoundaries, avoidSubclass, supportedGNodes, wordIsAscii } = state;
if (kind === "text_segment_boundary") {
throw new Error(`Unsupported text segment boundary "\\${negate ? "Y" : "y"}"`);
} else if (kind === "line_end") {
replaceWith(setParentDeep(createLookaroundAssertion({ body: [
createAlternative({ body: [createAssertion("string_end")] }),
createAlternative({ body: [createCharacter(10)] })
// `\n`
] }), parent));
} else if (kind === "line_start") {
replaceWith(setParentDeep(parseFragment(r`(?<=\A|\n(?!\z))`, { skipLookbehindValidation: true }), parent));
} else if (kind === "search_start") {
if (supportedGNodes.has(node)) {
root.flags.sticky = true;
remove();
} else {
const prev = container[key - 1];
if (prev && isAlwaysNonZeroLength(prev)) {
replaceWith(setParentDeep(createLookaroundAssertion({ negate: true }), parent));
} else if (avoidSubclass) {
throw new Error(r`Uses "\G" in a way that requires a subclass`);
} else {
replaceWith(setParent(createAssertion("string_start"), parent));
state.strategy = "clip_search";
}
}
} else if (kind === "string_end" || kind === "string_start") {
} else if (kind === "string_end_newline") {
replaceWith(setParentDeep(parseFragment(r`(?=\n?\z)`), parent));
} else if (kind === "word_boundary") {
if (!wordIsAscii && !asciiWordBoundaries) {
const b = `(?:(?<=${defaultWordChar})(?!${defaultWordChar})|(?<!${defaultWordChar})(?=${defaultWordChar}))`;
const B = `(?:(?<=${defaultWordChar})(?=${defaultWordChar})|(?<!${defaultWordChar})(?!${defaultWordChar}))`;
replaceWith(setParentDeep(parseFragment(negate ? B : b), parent));
}
} else {
throw new Error(`Unexpected assertion kind "${kind}"`);
}
},
Backreference({ node }, { jsGroupNameMap }) {
let { ref } = node;
if (typeof ref === "string" && !isValidJsGroupName(ref)) {
ref = getAndStoreJsGroupName(ref, jsGroupNameMap);
node.ref = ref;
}
},
CapturingGroup({ node }, { jsGroupNameMap, subroutineRefMap }) {
let { name } = node;
if (name && !isValidJsGroupName(name)) {
name = getAndStoreJsGroupName(name, jsGroupNameMap);
node.name = name;
}
subroutineRefMap.set(node.number, node);
if (name) {
subroutineRefMap.set(name, node);
}
},
CharacterClassRange({ node, parent, replaceWith }) {
if (parent.kind === "intersection") {
const cc = createCharacterClass({ body: [node] });
replaceWith(setParentDeep(cc, parent), { traverse: true });
}
},
CharacterSet({ node, parent, replaceWith }, { accuracy, minTargetEs2024, digitIsAscii, spaceIsAscii, wordIsAscii }) {
const { kind, negate, value } = node;
if (digitIsAscii && (kind === "digit" || value === "digit")) {
replaceWith(setParent(createCharacterSet("digit", { negate }), parent));
return;
}
if (spaceIsAscii && (kind === "space" || value === "space")) {
replaceWith(setParentDeep(setNegate(parseFragment(asciiSpaceChar), negate), parent));
return;
}
if (wordIsAscii && (kind === "word" || value === "word")) {
replaceWith(setParent(createCharacterSet("word", { negate }), parent));
return;
}
if (kind === "any") {
replaceWith(setParent(createUnicodeProperty("Any"), parent));
} else if (kind === "digit") {
replaceWith(setParent(createUnicodeProperty("Nd", { negate }), parent));
} else if (kind === "dot") {
} else if (kind === "text_segment") {
if (accuracy === "strict") {
throw new Error(r`Use of "\X" requires non-strict accuracy`);
}
const eBase = "\\p{Emoji}(?:\\p{EMod}|\\uFE0F\\u20E3?|[\\x{E0020}-\\x{E007E}]+\\x{E007F})?";
const emoji = r`\p{RI}{2}|${eBase}(?:\u200D${eBase})*`;
replaceWith(setParentDeep(parseFragment(
// Close approximation of an extended grapheme cluster; see <unicode.org/reports/tr29/>
r`(?>\r\n|${minTargetEs2024 ? r`\p{RGI_Emoji}` : emoji}|\P{M}\p{M}*)`,
// Allow JS property `RGI_Emoji` through
{ skipPropertyNameValidation: true }
), parent));
} else if (kind === "hex") {
replaceWith(setParent(createUnicodeProperty("AHex", { negate }), parent));
} else if (kind === "newline") {
replaceWith(setParentDeep(parseFragment(negate ? "[^\n]" : "(?>\r\n?|[\n\v\f\x85\u2028\u2029])"), parent));
} else if (kind === "posix") {
if (!minTargetEs2024 && (value === "graph" || value === "print")) {
if (accuracy === "strict") {
throw new Error(`POSIX class "${value}" requires min target ES2024 or non-strict accuracy`);
}
let ascii = {
graph: "!-~",
print: " -~"
}[value];
if (negate) {
ascii = `\0-${cp(ascii.codePointAt(0) - 1)}${cp(ascii.codePointAt(2) + 1)}-\u{10FFFF}`;
}
replaceWith(setParentDeep(parseFragment(`[${ascii}]`), parent));
} else {
replaceWith(setParentDeep(setNegate(parseFragment(PosixClassMap.get(value)), negate), parent));
}
} else if (kind === "property") {
if (!JsUnicodePropertyMap.has(slug2(value))) {
node.key = "sc";
}
} else if (kind === "space") {
replaceWith(setParent(createUnicodeProperty("space", { negate }), parent));
} else if (kind === "word") {
replaceWith(setParentDeep(setNegate(parseFragment(defaultWordChar), negate), parent));
} else {
throw new Error(`Unexpected character set kind "${kind}"`);
}
},
Directive({ node, parent, root, remove, replaceWith, removeAllPrevSiblings, removeAllNextSiblings }) {
const { kind, flags } = node;
if (kind === "flags") {
if (!flags.enable && !flags.disable) {
remove();
} else {
const flagGroup = createGroup({ flags });
flagGroup.body[0].body = removeAllNextSiblings();
replaceWith(setParentDeep(flagGroup, parent), { traverse: true });
}
} else if (kind === "keep") {
const firstAlt = root.body[0];
const hasWrapperGroup = root.body.length === 1 && // Not emulatable if within a `CapturingGroup`
hasOnlyChild(firstAlt, { type: "Group" }) && firstAlt.body[0].body.length === 1;
const topLevel = hasWrapperGroup ? firstAlt.body[0] : root;
if (parent.parent !== topLevel || topLevel.body.length > 1) {
throw new Error(r`Uses "\K" in a way that's unsupported`);
}
const lookbehind = createLookaroundAssertion({ behind: true });
lookbehind.body[0].body = removeAllPrevSiblings();
replaceWith(setParentDeep(lookbehind, parent));
} else {
throw new Error(`Unexpected directive kind "${kind}"`);
}
},
Flags({ node, parent }) {
if (node.posixIsAscii) {
throw new Error('Unsupported flag "P"');
}
if (node.textSegmentMode === "word") {
throw new Error('Unsupported flag "y{w}"');
}
[
"digitIsAscii",
// Flag D
"extended",
// Flag x
"posixIsAscii",
// Flag P
"spaceIsAscii",
// Flag S
"wordIsAscii",
// Flag W
"textSegmentMode"
// Flag y{g} or y{w}
].forEach((f) => delete node[f]);
Object.assign(node, {
// JS flag g; no Onig equiv
global: false,
// JS flag d; no Onig equiv
hasIndices: false,
// JS flag m; no Onig equiv but its behavior is always on in Onig. Onig's only line break
// char is line feed, unlike JS, so this flag isn't used since it would produce inaccurate
// results (also allows `^` and `$` to be used in the generator for string start and end)
multiline: false,
// JS flag y; no Onig equiv, but used for `\G` emulation
sticky: node.sticky ?? false
// Note: Regex+ doesn't allow explicitly adding flags it handles implicitly, so leave out
// properties `unicode` (JS flag u) and `unicodeSets` (JS flag v). Keep the existing values
// for `ignoreCase` (flag i) and `dotAll` (JS flag s, but Onig flag m)
});
parent.options = {
disable: {
// Onig uses different rules for flag x than Regex+, so disable the implicit flag
x: true,
// Onig has no flag to control "named capture only" mode but contextually applies its
// behavior when named capturing is used, so disable Regex+'s implicit flag for it
n: true
},
force: {
// Always add flag v because we're generating an AST that relies on it (it enables JS
// support for Onig features nested classes, intersection, Unicode properties, etc.).
// However, the generator might disable flag v based on its `target` option
v: true
}
};
},
Group({ node }) {
if (!node.flags) {
return;
}
const { enable, disable } = node.flags;
enable?.extended && delete enable.extended;
disable?.extended && delete disable.extended;
enable?.dotAll && disable?.dotAll && delete enable.dotAll;
enable?.ignoreCase && disable?.ignoreCase && delete enable.ignoreCase;
enable && !Object.keys(enable).length && delete node.flags.enable;
disable && !Object.keys(disable).length && delete node.flags.disable;
!node.flags.enable && !node.flags.disable && delete node.flags;
},
LookaroundAssertion({ node }, state) {
const { kind } = node;
if (kind === "lookbehind") {
state.passedLookbehind = true;
}
},
NamedCallout({ node, parent, replaceWith }) {
const { kind } = node;
if (kind === "fail") {
replaceWith(setParentDeep(createLookaroundAssertion({ negate: true }), parent));
} else {
throw new Error(`Unsupported named callout "(*${kind.toUpperCase()}"`);
}
},
Quantifier({ node }) {
if (node.body.type === "Quantifier") {
const group = createGroup();
group.body[0].body.push(node.body);
node.body = setParentDeep(group, node);
}
},
Regex: {
enter({ node }, { supportedGNodes }) {
const leadingGs = [];
let hasAltWithLeadG = false;
let hasAltWithoutLeadG = false;
for (const alt of node.body) {
if (alt.body.length === 1 && alt.body[0].kind === "search_start") {
alt.body.pop();
} else {
const leadingG = getLeadingG(alt.body);
if (leadingG) {
hasAltWithLeadG = true;
Array.isArray(leadingG) ? leadingGs.push(...leadingG) : leadingGs.push(leadingG);
} else {
hasAltWithoutLeadG = true;
}
}
}
if (hasAltWithLeadG && !hasAltWithoutLeadG) {
leadingGs.forEach((g) => supportedGNodes.add(g));
}
},
exit(_, { accuracy, passedLookbehind, strategy }) {
if (accuracy === "strict" && passedLookbehind && strategy) {
throw new Error(r`Uses "\G" in a way that requires non-strict accuracy`);
}
}
},
Subroutine({ node }, { jsGroupNameMap }) {
let { ref } = node;
if (typeof ref === "string" && !isValidJsGroupName(ref)) {
ref = getAndStoreJsGroupName(ref, jsGroupNameMap);
node.ref = ref;
}
}
};
var SecondPassVisitor = {
Backreference({ node }, { multiplexCapturesToLeftByRef, reffedNodesByReferencer }) {
const { orphan, ref } = node;
if (!orphan) {
reffedNodesByReferencer.set(node, [...multiplexCapturesToLeftByRef.get(ref).map(({ node: node2 }) => node2)]);
}
},
CapturingGroup: {
enter({
node,
parent,
replaceWith,
skip
}, {
groupOriginByCopy,
groupsByName,
multiplexCapturesToLeftByRef,
openRefs,
reffedNodesByReferencer
}) {
const origin = groupOriginByCopy.get(node);
if (origin && openRefs.has(node.number)) {
const recursion2 = setParent(createRecursion(node.number), parent);
reffedNodesByReferencer.set(recursion2, openRefs.get(node.number));
replaceWith(recursion2);
return;
}
openRefs.set(node.number, node);
multiplexCapturesToLeftByRef.set(node.number, []);
if (node.name) {
getOrInsert(multiplexCapturesToLeftByRef, node.name, []);
}
const multiplexNodes = multiplexCapturesToLeftByRef.get(node.name ?? node.number);
for (let i = 0; i < multiplexNodes.length; i++) {
const multiplex = multiplexNodes[i];
if (
// This group is from subroutine expansion, and there's a multiplex value from either the
// origin node or a prior subroutine expansion group with the same origin
origin === multiplex.node || origin && origin === multiplex.origin || // This group is not from subroutine expansion, and it comes after a subroutine expansion
// group that refers to this group
node === multiplex.origin
) {
multiplexNodes.splice(i, 1);
break;
}
}
multiplexCapturesToLeftByRef.get(node.number).push({ node, origin });
if (node.name) {
multiplexCapturesToLeftByRef.get(node.name).push({ node, origin });
}
if (node.name) {
const groupsWithSameName = getOrInsert(groupsByName, node.name, /* @__PURE__ */ new Map());
let hasDuplicateNameToRemove = false;
if (origin) {
hasDuplicateNameToRemove = true;
} else {
for (const groupInfo of groupsWithSameName.values()) {
if (!groupInfo.hasDuplicateNameToRemove) {
hasDuplicateNameToRemove = true;
break;
}
}
}
groupsByName.get(node.name).set(node, { node, hasDuplicateNameToRemove });
}
},
exit({ node }, { openRefs }) {
openRefs.delete(node.number);
}
},
Group: {
enter({ node }, state) {
state.prevFlags = state.currentFlags;
if (node.flags) {
state.currentFlags = getNewCurrentFlags(state.currentFlags, node.flags);
}
},
exit(_, state) {
state.currentFlags = state.prevFlags;
}
},
Subroutine({ node, parent, replaceWith }, state) {
const { isRecursive, ref } = node;
if (isRecursive) {
let reffed = parent;
while (reffed = reffed.parent) {
if (reffed.type === "CapturingGroup" && (reffed.name === ref || reffed.number === ref)) {
break;
}
}
state.reffedNodesByReferencer.set(node, reffed);
return;
}
const reffedGroupNode = state.subroutineRefMap.get(ref);
const isGlobalRecursion = ref === 0;
const expandedSubroutine = isGlobalRecursion ? createRecursion(0) : (
// The reffed group might itself contain subroutines, which are expanded during sub-traversal
cloneCapturingGroup(reffedGroupNode, state.groupOriginByCopy, null)
);
let replacement = expandedSubroutine;
if (!isGlobalRecursion) {
const reffedGroupFlagMods = getCombinedFlagModsFromFlagNodes(getAllParents(
reffedGroupNode,
(p) => p.type === "Group" && !!p.flags
));
const reffedGroupFlags = reffedGroupFlagMods ? getNewCurrentFlags(state.globalFlags, reffedGroupFlagMods) : state.globalFlags;
if (!areFlagsEqual(reffedGroupFlags, state.currentFlags)) {
replacement = createGroup({
flags: getFlagModsFromFlags(reffedGroupFlags)
});
replacement.body[0].body.push(expandedSubroutine);
}
}
replaceWith(setParentDeep(replacement, parent), { traverse: !isGlobalRecursion });
}
};
var ThirdPassVisitor = {
Backreference({ node, parent, replaceWith }, state) {
if (node.orphan) {
state.highestOrphanBackref = Math.max(state.highestOrphanBackref, node.ref);
return;
}
const reffedNodes = state.reffedNodesByReferencer.get(node);
const participants = reffedNodes.filter((reffed) => canParticipateWithNode(reffed, node));
if (!participants.length) {
replaceWith(setParentDeep(createLookaroundAssertion({ negate: true }), parent));
} else if (participants.length > 1) {
const group = createGroup({
atomic: true,
body: participants.reverse().map((reffed) => createAlternative({
body: [createBackreference(reffed.number)]
}))
});
replaceWith(setParentDeep(group, parent));
} else {
node.ref = participants[0].number;
}
},
CapturingGroup({ node }, state) {
node.number = ++state.numCapturesToLeft;
if (node.name) {
if (state.groupsByName.get(node.name).get(node).hasDuplicateNameToRemove) {
delete node.name;
}
}
},
Regex: {
exit({ node }, state) {
const numCapsNeeded = Math.max(state.highestOrphanBackref - state.numCapturesToLeft, 0);
for (let i = 0; i < numCapsNeeded; i++) {
const emptyCapture = createCapturingGroup();
node.body.at(-1).body.push(emptyCapture);
}
}
},
Subroutine({ node }, state) {
if (!node.isRecursive || node.ref === 0) {
return;
}
node.ref = state.reffedNodesByReferencer.get(node).number;
}
};
function addParentProperties(root) {
traverse(root, {
"*"({ node, parent }) {
node.parent = parent;
}
});
}
function areFlagsEqual(a, b) {
return a.dotAll === b.dotAll && a.ignoreCase === b.ignoreCase;
}
function canParticipateWithNode(capture, node) {
let rightmostPoint = node;
do {
if (rightmostPoint.type === "Regex") {
return false;
}
if (rightmostPoint.type === "Alternative") {
continue;
}
if (rightmostPoint === capture) {
return false;
}
const kidsOfParent = getKids(rightmostPoint.parent);
for (const kid of kidsOfParent) {
if (kid === rightmostPoint) {
break;
}
if (kid === capture || isAncestorOf(kid, capture)) {
return true;
}
}
} while (rightmostPoint = rightmostPoint.parent);
throw new Error("Unexpected path");
}
function cloneCapturingGroup(obj, originMap, up, up2) {
const store = Array.isArray(obj) ? [] : {};
for (const [key, value] of Object.entries(obj)) {
if (key === "parent") {
store.parent = Array.isArray(up) ? up2 : up;
} else if (value && typeof value === "object") {
store[key] = cloneCapturingGroup(value, originMap, store, up);
} else {
if (key === "type" && value === "CapturingGroup") {
originMap.set(store, originMap.get(obj) ?? obj);
}
store[key] = value;
}
}
return store;
}
function createRecursion(ref) {
const node = createSubroutine(ref);
node.isRecursive = true;
return node;
}
function getAllParents(node, filterFn) {
const results = [];
while (node = node.parent) {
if (!filterFn || filterFn(node)) {
results.push(node);
}
}
return results;
}
function getAndStoreJsGroupName(name, map) {
if (map.has(name)) {
return map.get(name);
}
const jsName = `$${map.size}_${name.replace(/^[^$_\p{IDS}]|[^$\u200C\u200D\p{IDC}]/ug, "_")}`;
map.set(name, jsName);
return jsName;
}
function getCombinedFlagModsFromFlagNodes(flagNodes) {
const flagProps = ["dotAll", "ignoreCase"];
const combinedFlags = { enable: {}, disable: {} };
flagNodes.forEach(({ flags }) => {
flagProps.forEach((prop) => {
if (flags.enable?.[prop]) {
delete combinedFlags.disable[prop];
combinedFlags.enable[prop] = true;
}
if (flags.disable?.[prop]) {
combinedFlags.disable[prop] = true;
}
});
});
if (!Object.keys(combinedFlags.enable).length) {
delete combinedFlags.enable;
}
if (!Object.keys(combinedFlags.disable).length) {
delete combinedFlags.disable;
}
if (combinedFlags.enable || combinedFlags.disable) {
return combinedFlags;
}
return null;
}
function getFlagModsFromFlags({ dotAll, ignoreCase }) {
const mods = {};
if (dotAll || ignoreCase) {
mods.enable = {};
dotAll && (mods.enable.dotAll = true);
ignoreCase && (mods.enable.ignoreCase = true);
}
if (!dotAll || !ignoreCase) {
mods.disable = {};
!dotAll && (mods.disable.dotAll = true);
!ignoreCase && (mods.disable.ignoreCase = true);
}
return mods;
}
function getKids(node) {
if (!node) {
throw new Error("Node expected");
}
const { body } = node;
return Array.isArray(body) ? body : body ? [body] : null;
}
function getLeadingG(els) {
const firstToConsider = els.find((el) => el.kind === "search_start" || isLoneGLookaround(el, { negate: false }) || !isAlwaysZeroLength(el));
if (!firstToConsider) {
return null;
}
if (firstToConsider.kind === "search_start") {
return firstToConsider;
}
if (firstToConsider.type === "LookaroundAssertion") {
return firstToConsider.body[0].body[0];
}
if (firstToConsider.type === "CapturingGroup" || firstToConsider.type === "Group") {
const gNodesForGroup = [];
for (const alt of firstToConsider.body) {
const leadingG = getLeadingG(alt.body);
if (!leadingG) {
return null;
}
Array.isArray(leadingG) ? gNodesForGroup.push(...leadingG) : gNodesForGroup.push(leadingG);
}
return gNodesForGroup;
}
return null;
}
function isAncestorOf(node, descendant) {
const kids = getKids(node) ?? [];
for (const kid of kids) {
if (kid === descendant || isAncestorOf(kid, descendant)) {
return true;
}
}
return false;
}
function isAlwaysZeroLength({ type }) {
return type === "Assertion" || type === "Directive" || type === "LookaroundAssertion";
}
function isAlwaysNonZeroLength(node) {
const types = [
"Character",
"CharacterClass",
"CharacterSet"
];
return types.includes(node.type) || node.type === "Quantifier" && node.min && types.includes(node.body.type);
}
function isLoneGLookaround(node, options) {
const opts = {
negate: null,
...options
};
return node.type === "LookaroundAssertion" && (opts.negate === null || node.negate === opts.negate) && node.body.length === 1 && hasOnlyChild(node.body[0], {
type: "Assertion",
kind: "search_start"
});
}
function isValidJsGroupName(name) {
return /^[$_\p{IDS}][$\u200C\u200D\p{IDC}]*$/u.test(name);
}
function parseFragment(pattern, options) {
const ast = parse(pattern, {
...options,
// Providing a custom set of Unicode property names avoids converting some JS Unicode
// properties (ex: `\p{Alpha}`) to Onig POSIX classes
unicodePropertyMap: JsUnicodePropertyMap
});
const alts = ast.body;
if (alts.length > 1 || alts[0].body.length > 1) {
return createGroup({ body: alts });
}
return alts[0].body[0];
}
function setNegate(node, negate) {
node.negate = negate;
return node;
}
function setParent(node, parent) {
node.parent = parent;
return node;
}
function setParentDeep(node, parent) {
addParentProperties(node);
node.parent = parent;
return node;
}
// src/generate.js
import { createAlternative as createAlternative2, createCharacter as createCharacter2, createGroup as createGroup2 } from "oniguruma-parser/parser";
import { traverse as traverse2 } from "oniguruma-parser/traverser";
function generate(ast, options) {
const opts = getOptions(options);
const minTargetEs2024 = isMinTarget(opts.target, "ES2024");
const minTargetEs2025 = isMinTarget(opts.target, "ES2025");
const recursionLimit = opts.rules.recursionLimit;
if (!Number.isInteger(recursionLimit) || recursionLimit < 2 || recursionLimit > 20) {
throw new Error("Invalid recursionLimit; use 2-20");
}
let hasCaseInsensitiveNode = null;
let hasCaseSensitiveNode = null;
if (!minTargetEs2025) {
const iStack = [ast.flags.ignoreCase];
traverse2(ast, FlagModifierVisitor, {
getCurrentModI: () => iStack.at(-1),
popModI() {
iStack.pop();
},
pushModI(isIOn) {
iStack.push(isIOn);
},
setHasCasedChar() {
if (iStack.at(-1)) {
hasCaseInsensitiveNode = true;
} else {
hasCaseSensitiveNode = true;
}
}
});
}
const appliedGlobalFlags = {
dotAll: ast.flags.dotAll,
// - Turn global flag i on if a case insensitive node was used and no case sensitive nodes were
// used (to avoid unnecessary node expansion).
// - Turn global flag i off if a case sensitive node was used (since case sensitivity can't be
// forced without the use of ES2025 flag groups)
ignoreCase: !!((ast.flags.ignoreCase || hasCaseInsensitiveNode) && !hasCaseSensitiveNode)
};
let lastNode = ast;
const state = {
accuracy: opts.accuracy,
appliedGlobalFlags,
captureMap: /* @__PURE__ */ new Map(),
currentFlags: {
dotAll: ast.flags.dotAll,
ignoreCase: ast.flags.ignoreCase
},
inCharClass: false,
lastNode,
originMap: ast._originMap,
recursionLimit,
useAppliedIgnoreCase: !!(!minTargetEs2025 && hasCaseInsensitiveNode && hasCaseSensitiveNode),
useFlagMods: minTargetEs2025,
useFlagV: minTargetEs2024,
verbose: opts.verbose
};
function gen(node) {
state.lastNode = lastNode;
lastNode = node;
const fn = throwIfNullish(generator[node.type], `Unexpected node type "${node.type}"`);
return fn(node, state, gen);
}
const result = {
pattern: ast.body.map(gen).join("|"),
// Could reset `lastNode` at this point via `lastNode = ast`, but it isn't needed by flags
flags: gen(ast.flags),
options: { ...ast.options }
};
if (!minTargetEs2024) {
delete result.options.force.v;
result.options.disable.v = true;
result.options.unicodeSetsPlugin = null;
}
result._captureTransfers = /* @__PURE__ */ new Map();
result._hiddenCaptures = [];
state.captureMap.forEach((value, key) => {
if (value.hidden) {
result._hiddenCaptures.push(key);
}
if (value.transferTo) {
getOrInsert(result._captureTransfers, value.transferTo, []).push(key);
}
});
return result;
}
var FlagModifierVisitor = {
"*": {
enter({ node }, state) {
if (isAnyGroup(node)) {
const currentModI = state.getCurrentModI();
state.pushModI(
node.flags ? getNewCurrentFlags({ ignoreCase: currentModI }, node.flags).ignoreCase : currentModI
);
}
},
exit({ node }, state) {
if (isAnyGroup(node)) {
state.popModI();
}
}
},
Backreference(_, state) {
state.setHasCasedChar();
},
Character({ node }, state) {
if (charHasCase(cp(node.value))) {
state.setHasCasedChar();
}
},
CharacterClassRange({ node, skip }, state) {
skip();
if (getCasesOutsideCharClassRange(node, { firstOnly: true }).length) {
state.setHasCasedChar();
}
},
CharacterSet({ node }, state) {
if (node.kind === "property" && UnicodePropertiesWithSpecificCase.has(node.value)) {
state.setHasCasedChar();
}
}
};
var generator = {
/**
@param {AlternativeNode} node
*/
Alternative({ body }, _, gen) {
return body.map(gen).join("");
},
/**
@param {AssertionNode} node
*/
Assertion({ kind, negate }) {
if (kind === "string_end") {
return "$";
}
if (kind === "string_start") {
return "^";
}
if (kind === "word_boundary") {
return negate ? r`\B` : r`\b`;
}
throw new Error(`Unexpected assertion kind "${kind}"`);
},
/**
@param {BackreferenceNode} node
*/
Backreference({ ref }, state) {
if (typeof ref !== "number") {
throw new Error("Unexpected named backref in transformed AST");
}
if (!state.useFlagMods && state.accuracy === "strict" && state.currentFlags.ignoreCase && !state.captureMap.get(ref).ignoreCase) {
throw new Error("Use of case-insensitive backref to case-sensitive group requires target ES2025 or non-strict accuracy");
}
return "\\" + ref;
},
/**
@param {CapturingGroupNode} node
*/
CapturingGroup(node, state, gen) {
const { body, name, number } = node;
const data = { ignoreCase: state.currentFlags.ignoreCase };
const origin = state.originMap.get(node);
if (origin) {
data.hidden = true;
if (number > origin.number) {
data.transferTo = origin.number;
}
}
state.captureMap.set(number, data);
return `(${name ? `?<${name}>` : ""}${body.map(gen).join("|")})`;
},
/**
@param {CharacterNode} node
*/
Character({ value }, state) {
const char = cp(value);
const escaped = getCharEscape(value, {
escDigit: state.lastNode.type === "Backreference",
inCharClass: state.inCharClass,
useFlagV: state.useFlagV
});
if (escaped !== char) {
return escaped;
}
if (state.useAppliedIgnoreCase && state.currentFlags.ignoreCase && charHasCase(char)) {
const cases = getIgnoreCaseMatchChars(char);
return state.inCharClass ? cases.join("") : cases.length > 1 ? `[${cases.join("")}]` : cases[0];
}
return char;
},
/**
@param {CharacterClassNode} node
*/
CharacterClass(node, state, gen) {
const { kind, negate, parent } = node;
let { body } = node;
if (kind === "intersection" && !state.useFlagV) {
throw new Error("Use of character class intersection requires min target ES2024");
}
if (envFlags.bugFlagVLiteralHyphenIsRange && state.useFlagV && body.some(isLiteralHyphen)) {
body = [createCharacter2(45), ...body.filter((kid) => !isLiteralHyphen(kid))];
}
const genClass = () => `[${negate ? "^" : ""}${body.map(gen).join(kind === "intersection" ? "&&" : "")}]`;
if (!state.inCharClass) {
if (
// Already established `kind !== 'intersection'` if `!state.useFlagV`; don't check again
(!state.useFlagV || envFlags.bugNestedClassIgnoresNegation) && !negate
) {
const negatedChildClasses = body.filter(
(kid) => kid.type === "CharacterClass" && kid.kind === "union" && kid.negate
);
if (negatedChildClasses.length) {
const group = createGroup2();
const groupFirstAlt = group.body[0];
group.parent = parent;
groupFirstAlt.parent = group;
body = body.filter((kid) => !negatedChildClasses.includes(kid));
node.body = body;
if (body.length) {
node.parent = groupFirstAlt;
groupFirstAlt.body.push(node);
} else {
group.body.pop();
}
negatedChildClasses.forEach((cc) => {
const newAlt = createAlternative2({ body: [cc] });
cc.parent = newAlt;
newAlt.parent = group;
group.body.push(newAlt);
});
return gen(group);
}
}
state.inCharClass = true;
const result = genClass();
state.inCharClass = false;
return result;
}
const firstEl = body[0];
if (
// Already established that the parent is a char class via `inCharClass`; don't check again
kind === "union" && !negate && firstEl && // Allows many nested classes to work with `target` ES2018 which doesn't support nesting
((!state.useFlagV || !state.verbose) && parent.kind === "union" && !(envFlags.bugFlagVLiteralHyphenIsRange && state.useFlagV) || !state.verbose && parent.kind === "intersection" && // JS doesn't allow intersection with union or ranges
body.length === 1 && firstEl.type !== "CharacterClassRange")
) {
return body.map(gen).join("");
}
if (!state.useFlagV && parent.type === "CharacterClass") {
throw new Error("Uses nested character class in a way that requires min target ES2024");
}
return genClass();
},
/**
@param {CharacterClassRangeNode} node
*/
CharacterClassRange(node, state) {
const min = node.min.value;
const max = node.max.value;
const escOpts = {
escDigit: false,
inCharClass: true,
useFlagV: state.useFlagV
};
const minStr = getCharEscape(min, escOpts);
const maxStr = getCharEscape(max, escOpts);
const extraChars = /* @__PURE__ */ new Set();
if (state.useAppliedIgnoreCase && state.currentFlags.ignoreCase) {
const charsOutsideRange = getCasesOutsideCharClassRange(node);
const ranges = getCodePointRangesFromChars(charsOutsideRange);
ranges.forEach((value) => {
extraChars.add(
Array.isArray(value) ? `${getCharEscape(value[0], escOpts)}-${getCharEscape(value[1], escOpts)}` : getCharEscape(value, escOpts)
);
});
}
return `${minStr}-${maxStr}${[...extraChars].join("")}`;
},
/**
@param {CharacterSetNode} node
*/
CharacterSet({ kind, negate, value, key }, state) {
if (kind === "dot") {
return state.currentFlags.dotAll ? state.appliedGlobalFlags.dotAll || state.useFlagMods ? "." : "[^]" : (
// Onig's only line break char is line feed, unlike JS
r`[^\n]`
);
}
if (kind === "digit") {
return negate ? r`\D` : r`\d`;
}
if (kind === "property") {
if (state.useAppliedIgnoreCase && state.currentFlags.ignoreCase && UnicodePropertiesWithSpecificCase.has(value)) {
throw new Error(`Unicode property "${value}" can't be case-insensitive when other chars have specific case`);
}
return `${negate ? r`\P` : r`\p`}{${key ? `${key}=` : ""}${value}}`;
}
if (kind === "word") {
return negate ? r`\W` : r`\w`;
}
throw new Error(`Unexpected character set kind "${kind}"`);
},
/**
@param {FlagsNode} node
*/
Flags(node, state) {
return (
// The transformer should never turn on the properties for flags d, g, m since Onig doesn't
// have equivs. Flag m is never used since Onig uses different line break chars than JS
// (node.hasIndices ? 'd' : '') +
// (node.global ? 'g' : '') +
// (node.multiline ? 'm' : '') +
(state.appliedGlobalFlags.ignoreCase ? "i" : "") + (node.dotAll ? "s" : "") + (node.sticky ? "y" : "")
);
},
/**
@param {GroupNode} node
*/
Group({ atomic: atomic2, body, flags, parent }, state, gen) {
const currentFlags = state.currentFlags;
if (flags) {
state.currentFlags = getNewCurrentFlags(currentFlags, flags);
}
const contents = body.map(gen).join("|");
const result = !state.verbose && body.length === 1 && // Single alt
parent.type !== "Quantifier" && !atomic2 && (!state.useFlagMods || !flags) ? contents : `(?${getGroupPrefix(atomic2, flags, state.useFlagMods)}${contents})`;
state.currentFlags = currentFlags;
return result;
},
/**
@param {LookaroundAssertionNode} node
*/
LookaroundAssertion({ body, kind, negate }, _, gen) {
const prefix = `${kind === "lookahead" ? "" : "<"}${negate ? "!" : "="}`;
return `(?${prefix}${body.map(gen).join("|")})`;
},
/**
@param {QuantifierNode} node
*/
Quantifier(node, _, gen) {
return gen(node.body) + getQuantifierStr(node);
},
/**
@param {SubroutineNode & {isRecursive: true}} node
*/
Subroutine({ isRecursive, ref }, state) {
if (!isRecursive) {
throw new Error("Unexpected non-recursive subroutine in transformed AST");
}
const limit = state.recursionLimit;
return ref === 0 ? `(?R=${limit})` : r`\g<${ref}&R=${limit}>`;
}
};
var BaseEscapeChars = /* @__PURE__ */ new Set([
"$",
"(",
")",
"*",
"+",
".",
"?",
"[",
"\\",
"]",
"^",
"{",
"|",
"}"
]);
var CharClassEscapeChars = /* @__PURE__ */ new Set([
"-",
"\\",
"]",
"^",
// Literal `[` doesn't require escaping with flag u, but this can help work around regex source
// linters and regex syntax processors that expect unescaped `[` to create a nested class
"["
]);
var CharClassEscapeCharsFlagV = /* @__PURE__ */ new Set([
"(",
")",
"-",
"/",
"[",
"\\",
"]",
"^",
"{",
"|",
"}",
// Double punctuators; also includes already-listed `-` and `^`
"!",
"#",
"$",
"%",
"&",
"*",
"+",
",",
".",
":",
";",
"<",
"=",
">",
"?",
"@",
"`",
"~"
]);
var CharCodeEscapeMap = /* @__PURE__ */ new Map([
[9, r`\t`],
// horizontal tab
[10, r`\n`],
// line feed
[11, r`\v`],
// vertical tab
[12, r`\f`],
// form feed
[13, r`\r`],
// carriage return
[8232, r`\u2028`],
// line separator
[8233, r`\u2029`],
// paragraph separator
[65279, r`\uFEFF`]
// ZWNBSP/BOM
]);
var casedRe = /^\p{Cased}$/u;
function charHasCase(char) {
return casedRe.test(char);
}
function getCasesOutsideCharClassRange(node, options) {
const firstOnly = !!options?.firstOnly;
const min = node.min.value;
const max = node.max.value;
const found = [];
if (min < 65 && (max === 65535 || max >= 131071) || min === 65536 && max >= 131071) {
return found;
}
for (let i = min; i <= max; i++) {
const char = cp(i);
if (!charHasCase(char)) {
continue;
}
const charsOutsideRange = getIgnoreCaseMatchChars(char).filter((caseOfChar) => {
const num = caseOfChar.codePointAt(0);
return num < min || num > max;
});
if (charsOutsideRange.length) {
found.push(...charsOutsideRange);
if (firstOnly) {
break;
}
}
}
return found;
}
function getCharEscape(codePoint, { escDigit, inCharClass, useFlagV }) {
if (CharCodeEscapeMap.has(codePoint)) {
return CharCodeEscapeMap.get(codePoint);
}
if (
// Control chars, etc.; condition modeled on the Chrome developer console's display for strings
codePoint < 32 || codePoint > 126 && codePoint < 160 || // Unicode planes 4-16; unassigned, special purpose, and private use area
codePoint > 262143 || // Avoid corrupting a preceding backref by immediately following it with a literal digit
escDigit && isDigitCharCode(codePoint)
) {
return codePoint > 255 ? `\\u{${codePoint.toString(16).toUpperCase()}}` : `\\x${codePoint.toString(16).toUpperCase().padStart(2, "0")}`;
}
const escapeChars = inCharClass ? useFlagV ? CharClassEscapeCharsFlagV : CharClassEscapeChars : BaseEscapeChars;
const char = cp(codePoint);
return (escapeChars.has(char) ? "\\" : "") + char;
}
function getCodePointRangesFromChars(chars) {
const codePoints = chars.map((char) => char.codePointAt(0)).sort((a, b) => a - b);
const values = [];
let start = null;
for (let i = 0; i < codePoints.length; i++) {
if (codePoints[i + 1] === codePoints[i] + 1) {
start ??= codePoints[i];
} else if (start === null) {
values.push(codePoints[i]);
} else {
values.push([start, codePoints[i]]);
start = null;
}
}
return values;
}
function getGroupPrefix(atomic2, flagMods, useFlagMods) {
if (atomic2) {
return ">";
}
let mods = "";
if (flagMods && useFlagMods) {
const { enable, disable } = flagMods;
mods = (enable?.ignoreCase ? "i" : "") + (enable?.dotAll ? "s" : "") + (disable ? "-" : "") + (disable?.ignoreCase ? "i" : "") + (disable?.dotAll ? "s" : "");
}
return `${mods}:`;
}
function getQuantifierStr({ kind, max, min }) {
let base;
if (!min && max === 1) {
base = "?";
} else if (!min && max === Infinity) {
base = "*";
} else if (min === 1 && max === Infinity) {
base = "+";
} else if (min === max) {
base = `{${min}}`;
} else {
base = `{${min},${max === Infinity ? "" : max}}`;
}
return base + {
greedy: "",
lazy: "?",
possessive: "+"
}[kind];
}
function isAnyGroup({ type }) {
return type === "CapturingGroup" || type === "Group" || type === "LookaroundAssertion";
}
function isDigitCharCode(value) {
return value > 47 && value < 58;
}
function isLiteralHyphen({ type, value }) {
return type === "Character" && value === 45;
}
// src/subclass.js
var EmulatedRegExp = class _EmulatedRegExp extends RegExp {
/**
@type {Map<number, {
hidden?: true;
transferTo?: number;
}>}
*/
#captureMap = /* @__PURE__ */ new Map();
/**
@type {RegExp | EmulatedRegExp | null}
*/
#compiled = null;
/**
@type {string}
*/
#pattern;
/**
@type {Map<number, string>?}
*/
#nameMap = null;
/**
@type {string?}
*/
#strategy = null;
/**
Can be used to serialize the instance.
@type {EmulatedRegExpOptions}
*/
rawOptions = {};
// Override the getter with one that works with lazy-compiled regexes
get source() {
return this.#pattern || "(?:)";
}
/**
@overload
@param {string} pattern
@param {string} [flags]
@param {EmulatedRegExpOptions} [options]
*/
/**
@overload
@param {EmulatedRegExp} pattern
@param {string} [flags]
*/
constructor(pattern, flags, options) {
const lazyCompile = !!options?.lazyCompile;
if (pattern instanceof RegExp) {
if (options) {
throw new Error("Cannot provide options when copying a regexp");
}
const re = pattern;
super(re, flags);
this.#pattern = re.source;
if (re instanceof _EmulatedRegExp) {
this.#captureMap = re.#captureMap;
this.#nameMap = re.#nameMap;
this.#strategy = re.#strategy;
this.rawOptions = re.rawOptions;
}
} else {
const opts = {
hiddenCaptures: [],
strategy: null,
transfers: [],
...options
};
super(lazyCompile ? "" : pattern, flags);
this.#pattern = pattern;
this.#captureMap = createCaptureMap(opts.hiddenCaptures, opts.transfers);
this.#strategy = opts.strategy;
this.rawOptions = options ?? {};
}
if (!lazyCompile) {
this.#compiled = this;
}
}
/**
Called internally by all String/RegExp methods that use regexes.
@override
@param {string} str
@returns {RegExpExecArray?}
*/
exec(str) {
if (!this.#compiled) {
const { lazyCompile, ...rest } = this.rawOptions;
this.#compiled = new _EmulatedRegExp(this.#pattern, this.flags, rest);
}
const useLastIndex = this.global || this.sticky;
const pos = this.lastIndex;
if (this.#strategy === "clip_search" && useLastIndex && pos) {
this.lastIndex = 0;
const match = this.#execCore(str.slice(pos));
if (match) {
adjustMatchDetailsForOffset(match, pos, str, this.hasIndices);
this.lastIndex += pos;
}
return match;
}
return this.#execCore(str);
}
/**
Adds support for hidden and transfer captures.
@param {string} str
@returns
*/
#execCore(str) {
this.#compiled.lastIndex = this.lastIndex;
const match = super.exec.call(this.#compiled, str);
this.lastIndex = this.#compiled.lastIndex;
if (!match || !this.#captureMap.size) {
return match;
}
const matchCopy = [...match];
match.length = 1;
let indicesCopy;
if (this.hasIndices) {
indicesCopy = [...match.indices];
match.indices.length = 1;
}
const mappedNums = [0];
for (let i = 1; i < matchCopy.length; i++) {
const { hidden, transferTo } = this.#captureMap.get(i) ?? {};
if (hidden) {
mappedNums.push(null);
} else {
mappedNums.push(match.length);
match.push(matchCopy[i]);
if (this.hasIndices) {
match.indices.push(indicesCopy[i]);
}
}
if (transferTo && matchCopy[i] !== void 0) {
const to = mappedNums[transferTo];
if (!to) {
throw new Error(`Invalid capture transfer to "${to}"`);
}
match[to] = matchCopy[i];
if (this.hasIndices) {
match.indices[to] = indicesCopy[i];
}
if (match.groups) {
if (!this.#nameMap) {
this.#nameMap = createNameMap(this.source);
}
const name = this.#nameMap.get(transferTo);
if (name) {
match.groups[name] = matchCopy[i];
if (this.hasIndices) {
match.indices.groups[name] = indicesCopy[i];
}
}
}
}
}
return match;
}
};
function adjustMatchDetailsForOffset(match, offset, input, hasIndices) {
match.index += offset;
match.input = input;
if (hasIndices) {
const indices = match.indices;
for (let i = 0; i < indices.length; i++) {
const arr = indices[i];
if (arr) {
indices[i] = [arr[0] + offset, arr[1] + offset];
}
}
const groupIndices = indices.groups;
if (groupIndices) {
Object.keys(groupIndices).forEach((key) => {
const arr = groupIndices[key];
if (arr) {
groupIndices[key] = [arr[0] + offset, arr[1] + offset];
}
});
}
}
}
function createCaptureMap(hiddenCaptures, transfers) {
const captureMap = /* @__PURE__ */ new Map();
for (const num of hiddenCaptures) {
captureMap.set(num, {
hidden: true
});
}
for (const [to, from] of transfers) {
for (const num of from) {
getOrInsert(captureMap, num, {}).transferTo = to;
}
}
return captureMap;
}
function createNameMap(pattern) {
const re = /(?<capture>\((?:\?<(?![=!])(?<name>[^>]+)>|(?!\?)))|\\?./gsu;
const map = /* @__PURE__ */ new Map();
let numCharClassesOpen = 0;
let numCaptures = 0;
let match;
while (match = re.exec(pattern)) {
const { 0: m, groups: { capture, name } } = match;
if (m === "[") {
numCharClassesOpen++;
} else if (!numCharClassesOpen) {
if (capture) {
numCaptures++;
if (name) {
map.set(numCaptures, name);
}
}
} else if (m === "]") {
numCharClassesOpen--;
}
}
return map;
}
// src/index.js
import { parse as parse2 } from "oniguruma-parser/parser";
import { atomic, possessive } from "regex/internals";
import { recursion } from "regex-recursion";
function toRegExp(pattern, options) {
const d = toRegExpDetails(pattern, options);
if (d.options) {
return new EmulatedRegExp(d.pattern, d.flags, d.options);
}
return new RegExp(d.pattern, d.flags);
}
function toRegExpDetails(pattern, options) {
const opts = getOptions(options);
const onigurumaAst = parse2(pattern, {
flags: opts.flags,
normalizeUnknownPropertyNames: true,
rules: {
captureGroup: opts.rules.captureGroup,
singleline: opts.rules.singleline
},
skipBackrefValidation: opts.rules.allowOrphanBackrefs,
unicodePropertyMap: JsUnicodePropertyMap
});
const regexPlusAst = transform(onigurumaAst, {
accuracy: opts.accuracy,
asciiWordBoundaries: opts.rules.asciiWordBoundaries,
avoidSubclass: opts.avoidSubclass,
bestEffortTarget: opts.target
});
const generated = generate(regexPlusAst, opts);
const recursionResult = recursion(generated.pattern, {
captureTransfers: generated._captureTransfers,
hiddenCaptures: generated._hiddenCaptures,
mode: "external"
});
const possessiveResult = possessive(recursionResult.pattern);
const atomicResult = atomic(possessiveResult.pattern, {
captureTransfers: recursionResult.captureTransfers,
hiddenCaptures: recursionResult.hiddenCaptures
});
const details = {
pattern: atomicResult.pattern,
flags: `${opts.hasIndices ? "d" : ""}${opts.global ? "g" : ""}${generated.flags}${generated.options.disable.v ? "u" : "v"}`
};
if (opts.avoidSubclass) {
if (opts.lazyCompileLength !== Infinity) {
throw new Error("Lazy compilation requires subclass");
}
} else {
const hiddenCaptures = atomicResult.hiddenCaptures.sort((a, b) => a - b);
const transfers = Array.from(atomicResult.captureTransfers);
const strategy = regexPlusAst._strategy;
const lazyCompile = details.pattern.length >= opts.lazyCompileLength;
if (hiddenCaptures.length || transfers.length || strategy || lazyCompile) {
details.options = {
...hiddenCaptures.length && { hiddenCaptures },
...transfers.length && { transfers },
...strategy && { strategy },
...lazyCompile && { lazyCompile }
};
}
}
return details;
}
export {
EmulatedRegExp,
toRegExp,
toRegExpDetails
};
//# sourceMappingURL=index.js.map