Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions libregexp.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,15 @@ typedef enum {

#define TMP_BUF_SIZE 128

// invariant: is_unicode ^ unicode_sets (or neither, but not both)
typedef struct {
DynBuf byte_code;
const uint8_t *buf_ptr;
const uint8_t *buf_end;
const uint8_t *buf_start;
int re_flags;
BOOL is_unicode;
BOOL unicode_sets;
BOOL ignore_case;
BOOL dotall;
int capture_count;
Expand Down Expand Up @@ -853,6 +855,8 @@ static int re_emit_range(REParseState *s, const CharRange *cr)
return 0;
}

// s->unicode turns patterns like []] into syntax errors
// s->unicode_sets turns more patterns into errors, like [a-] or [[]
static int re_parse_char_class(REParseState *s, const uint8_t **pp)
{
const uint8_t *p;
Expand All @@ -864,17 +868,43 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
cr_init(cr, s->opaque, lre_realloc);
p = *pp;
p++; /* skip '[' */

if (s->unicode_sets) {
static const char verboten[] =
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is it german? 😅

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or Dutch ;-)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is it german? 😅

To make it sound more affirmative? Using the French interdit would definitely sound less absolute.

As explained in this quote is attributed to Sir Winston Churchill:

In England, everything is permitted except what is forbidden.
In Germany, everything is forbidden except what is permitted.
In France, everything is allowed, even what is prohibited.
In the USSR, everything is prohibited, even what is permitted.

Copy link
Collaborator

@chqrlie chqrlie Mar 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or Dutch ;-)

No, it would be verboden in Dutch, unless a local dialect in the northern part of Holland is closer to German.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The dialect around here is in fact very close to Low German but really the reason I used "verboten" is because I'm a big fan of German as spoken (well...) in that one scene from the movie Der Untergang :)

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but really the reason I used "verboten" is because I'm a big fan of German

Okay 😂 Nice one

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dann können wir hier auf Deutsch diskutieren. Kannst to auch Saúl ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No chance 😅

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tut mir Leid... let's switch back to the lingua franca :)

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Na gut, akzeptiert xD

"()[{}/-|" "\0"
"&&!!##$$%%**++,,..::;;<<==>>??@@``~~" "\0"
"^^^_^^";
const char *s = verboten;
int n = 1;
do {
if (!memcmp(s, p, n))
if (p[n] == ']')
goto invalid_class_range;
s += n;
if (!*s) {
s++;
n++;
}
} while (n < 4);
}

invert = FALSE;
if (*p == '^') {
p++;
invert = TRUE;
}

for(;;) {
if (*p == ']')
break;
c1 = get_class_atom(s, cr1, &p, TRUE);
if ((int)c1 < 0)
goto fail;
if (*p == '-' && p[1] == ']' && s->unicode_sets) {
if (c1 >= CLASS_RANGE_BASE)
cr_free(cr1);
goto invalid_class_range;
}
if (*p == '-' && p[1] != ']') {
const uint8_t *p0 = p + 1;
if (c1 >= CLASS_RANGE_BASE) {
Expand Down Expand Up @@ -1843,6 +1873,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
is_sticky = ((re_flags & LRE_FLAG_STICKY) != 0);
s->ignore_case = ((re_flags & LRE_FLAG_IGNORECASE) != 0);
s->dotall = ((re_flags & LRE_FLAG_DOTALL) != 0);
s->unicode_sets = ((re_flags & LRE_FLAG_UNICODE_SETS) != 0);
s->capture_count = 1;
s->total_capture_count = -1;
s->has_named_captures = -1;
Expand Down
2 changes: 1 addition & 1 deletion libregexp.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@
#define LRE_FLAG_UNICODE (1 << 4)
#define LRE_FLAG_STICKY (1 << 5)
#define LRE_FLAG_INDICES (1 << 6) /* Unused by libregexp, just recorded. */

#define LRE_FLAG_NAMED_GROUPS (1 << 7) /* named groups are present in the regexp */
#define LRE_FLAG_UNICODE_SETS (1 << 8)

uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
const char *buf, size_t buf_len, int re_flags,
Expand Down
13 changes: 13 additions & 0 deletions quickjs.c
Original file line number Diff line number Diff line change
Expand Up @@ -40730,6 +40730,9 @@ static JSValue js_compile_regexp(JSContext *ctx, JSValue pattern,
case 'u':
mask = LRE_FLAG_UNICODE;
break;
case 'v':
mask = LRE_FLAG_UNICODE_SETS;
break;
case 'y':
mask = LRE_FLAG_STICKY;
break;
Expand All @@ -40746,6 +40749,10 @@ static JSValue js_compile_regexp(JSContext *ctx, JSValue pattern,
JS_FreeCString(ctx, str);
}

if (re_flags & LRE_FLAG_UNICODE)
if (re_flags & LRE_FLAG_UNICODE_SETS)
return JS_ThrowSyntaxError(ctx, "invalid regular expression flags");

str = JS_ToCStringLen2(ctx, &len, pattern, !(re_flags & LRE_FLAG_UNICODE));
if (!str)
return JS_EXCEPTION;
Expand Down Expand Up @@ -41067,6 +41074,11 @@ static JSValue js_regexp_get_flags(JSContext *ctx, JSValue this_val)
goto exception;
if (res)
*p++ = 'u';
res = JS_ToBoolFree(ctx, JS_GetPropertyStr(ctx, this_val, "unicodeSets"));
if (res < 0)
goto exception;
if (res)
*p++ = 'v';
res = JS_ToBoolFree(ctx, JS_GetPropertyStr(ctx, this_val, "sticky"));
if (res < 0)
goto exception;
Expand Down Expand Up @@ -42152,6 +42164,7 @@ static const JSCFunctionListEntry js_regexp_proto_funcs[] = {
JS_CGETSET_MAGIC_DEF("multiline", js_regexp_get_flag, NULL, LRE_FLAG_MULTILINE ),
JS_CGETSET_MAGIC_DEF("dotAll", js_regexp_get_flag, NULL, LRE_FLAG_DOTALL ),
JS_CGETSET_MAGIC_DEF("unicode", js_regexp_get_flag, NULL, LRE_FLAG_UNICODE ),
JS_CGETSET_MAGIC_DEF("unicodeSets", js_regexp_get_flag, NULL, LRE_FLAG_UNICODE_SETS ),
JS_CGETSET_MAGIC_DEF("sticky", js_regexp_get_flag, NULL, LRE_FLAG_STICKY ),
JS_CGETSET_MAGIC_DEF("hasIndices", js_regexp_get_flag, NULL, LRE_FLAG_INDICES ),
JS_CFUNC_DEF("exec", 1, js_regexp_exec ),
Expand Down
144 changes: 143 additions & 1 deletion test262.conf
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ regexp-lookbehind
regexp-match-indices
regexp-named-groups
regexp-unicode-property-escapes
regexp-v-flag=skip
regexp-v-flag
resizable-arraybuffer=skip
rest-parameters
Set
Expand Down Expand Up @@ -223,5 +223,147 @@ test262/test/built-ins/ThrowTypeError/unique-per-realm-function-proto.js
#test262/test/built-ins/RegExp/CharacterClassEscapes/
#test262/test/built-ins/RegExp/property-escapes/

# in progress regexp-v-flag support, see https://github.com/quickjs-ng/quickjs/issues/228
test262/test/built-ins/RegExp/property-escapes/generated/strings/Basic_Emoji-negative-CharacterClass.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/Basic_Emoji-negative-P.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/Basic_Emoji-negative-u.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/Basic_Emoji.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/Emoji_Keycap_Sequence-negative-CharacterClass.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/Emoji_Keycap_Sequence-negative-P.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/Emoji_Keycap_Sequence-negative-u.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/Emoji_Keycap_Sequence.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji-negative-CharacterClass.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji-negative-P.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji-negative-u.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Flag_Sequence-negative-CharacterClass.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Flag_Sequence-negative-P.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Flag_Sequence-negative-u.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Flag_Sequence.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Modifier_Sequence-negative-CharacterClass.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Modifier_Sequence-negative-P.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Modifier_Sequence-negative-u.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Modifier_Sequence.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Tag_Sequence-negative-CharacterClass.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Tag_Sequence-negative-P.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Tag_Sequence-negative-u.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Tag_Sequence.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence-negative-CharacterClass.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence-negative-P.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence-negative-u.js
test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-difference-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-difference-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-difference-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-difference-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-difference-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-difference-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-union-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-union-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-union-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-union-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-union-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-escape-union-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-intersection-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-intersection-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-intersection-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-intersection-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-intersection-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-intersection-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-union-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-union-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-union-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-union-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-union-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-class-union-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-difference-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-difference-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-difference-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-difference-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-difference-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-difference-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-intersection-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-intersection-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-intersection-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-intersection-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-intersection-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-intersection-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-union-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-property-escape-union-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-union-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-union-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-union-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-union-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-union-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/character-union-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-intersection-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-intersection-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-intersection-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-intersection-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-intersection-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-intersection-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/rgi-emoji-13.1.js
test262/test/built-ins/RegExp/unicodeSets/generated/rgi-emoji-14.0.js
test262/test/built-ins/RegExp/unicodeSets/generated/rgi-emoji-15.0.js
test262/test/built-ins/RegExp/unicodeSets/generated/rgi-emoji-15.1.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-difference-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-difference-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-difference-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-difference-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-difference-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-difference-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-intersection-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-intersection-string-literal.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-union-character-class-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-union-character-class.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-union-character-property-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-union-character.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-union-property-of-strings-escape.js
test262/test/built-ins/RegExp/unicodeSets/generated/string-literal-union-string-literal.js

[tests]
# list test files or use config.testdir
11 changes: 11 additions & 0 deletions tests/test_builtin.js
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,17 @@ function test_regexp()
assert(/{1a}/.toString(), "/{1a}/");
a = /a{1+/.exec("a{11");
assert(a, ["a{11"] );

eval("/[a-]/"); // accepted with no flag
eval("/[a-]/u"); // accepted with 'u' flag

let ex;
try {
eval("/[a-]/v"); // rejected with 'v' flag
} catch (_ex) {
ex = _ex;
}
assert(ex?.message, "invalid class range");
}

function test_symbol()
Expand Down