Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion gen/function_source.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
const uint32_t qjsc_function_source_size = 384;

const uint8_t qjsc_function_source[384] = {
0x0d, 0x06, 0x0c, 0x61, 0x63, 0x74, 0x75, 0x61,
0x0e, 0x06, 0x0c, 0x61, 0x63, 0x74, 0x75, 0x61,
0x6c, 0x02, 0x66, 0x30, 0x74, 0x65, 0x73, 0x74,
0x73, 0x2f, 0x66, 0x75, 0x6e, 0x63, 0x74, 0x69,
0x6f, 0x6e, 0x5f, 0x73, 0x6f, 0x75, 0x72, 0x63,
Expand Down
2 changes: 1 addition & 1 deletion gen/hello.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
const uint32_t qjsc_hello_size = 89;

const uint8_t qjsc_hello[89] = {
0x0d, 0x04, 0x0e, 0x63, 0x6f, 0x6e, 0x73, 0x6f,
0x0e, 0x04, 0x0e, 0x63, 0x6f, 0x6e, 0x73, 0x6f,
0x6c, 0x65, 0x06, 0x6c, 0x6f, 0x67, 0x16, 0x48,
0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57, 0x6f, 0x72,
0x6c, 0x64, 0x22, 0x65, 0x78, 0x61, 0x6d, 0x70,
Expand Down
4 changes: 2 additions & 2 deletions gen/hello_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
const uint32_t qjsc_fib_module_size = 311;

const uint8_t qjsc_fib_module[311] = {
0x0d, 0x03, 0x2c, 0x65, 0x78, 0x61, 0x6d, 0x70,
0x0e, 0x03, 0x2c, 0x65, 0x78, 0x61, 0x6d, 0x70,
0x6c, 0x65, 0x73, 0x2f, 0x66, 0x69, 0x62, 0x5f,
0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x2e, 0x6a,
0x73, 0x06, 0x66, 0x69, 0x62, 0x02, 0x6e, 0x0d,
Expand Down Expand Up @@ -49,7 +49,7 @@ const uint8_t qjsc_fib_module[311] = {
const uint32_t qjsc_hello_module_size = 178;

const uint8_t qjsc_hello_module[178] = {
0x0d, 0x07, 0x30, 0x65, 0x78, 0x61, 0x6d, 0x70,
0x0e, 0x07, 0x30, 0x65, 0x78, 0x61, 0x6d, 0x70,
0x6c, 0x65, 0x73, 0x2f, 0x68, 0x65, 0x6c, 0x6c,
0x6f, 0x5f, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65,
0x2e, 0x6a, 0x73, 0x1e, 0x2e, 0x2f, 0x66, 0x69,
Expand Down
2,052 changes: 1,026 additions & 1,026 deletions gen/repl.c

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion gen/test_fib.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
const uint32_t qjsc_test_fib_size = 293;

const uint8_t qjsc_test_fib[293] = {
0x0d, 0x0d, 0x28, 0x65, 0x78, 0x61, 0x6d, 0x70,
0x0e, 0x0d, 0x28, 0x65, 0x78, 0x61, 0x6d, 0x70,
0x6c, 0x65, 0x73, 0x2f, 0x74, 0x65, 0x73, 0x74,
0x5f, 0x66, 0x69, 0x62, 0x2e, 0x6a, 0x73, 0x04,
0x6f, 0x73, 0x0a, 0x69, 0x73, 0x57, 0x69, 0x6e,
Expand Down
3 changes: 1 addition & 2 deletions libregexp-opcode.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ DEF(range32, 3) /* variable length */
DEF(lookahead, 5)
DEF(negative_lookahead, 5)
DEF(push_char_pos, 1) /* push the character position on the stack */
DEF(bne_char_pos, 5) /* pop one stack element and jump if equal to the character
position */
DEF(check_advance, 1) /* pop one stack element and check that it is different from the character position */
DEF(prev, 1) /* go to the previous char */
DEF(simple_greedy_quant, 17)

Expand Down
108 changes: 37 additions & 71 deletions libregexp.c
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,6 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
case REOP_loop:
case REOP_lookahead:
case REOP_negative_lookahead:
case REOP_bne_char_pos:
val = get_u32(buf + pos + 1);
val += (pos + 5);
printf(" %u", val);
Expand Down Expand Up @@ -976,21 +975,17 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
}

/* Return:
1 if the opcodes in bc_buf[] always advance the character pointer.
0 if the character pointer may not be advanced.
-1 if the code may depend on side effects of its previous execution (backreference)
- true if the opcodes may not advance the char pointer
- false if the opcodes always advance the char pointer
*/
static int re_check_advance(const uint8_t *bc_buf, int bc_buf_len)
static BOOL re_need_check_advance(const uint8_t *bc_buf, int bc_buf_len)
{
int pos, opcode, ret, len, i;
uint32_t val, last;
BOOL has_back_reference;
uint8_t capture_bitmap[CAPTURE_COUNT_MAX];
int pos, opcode, len;
uint32_t val;
BOOL ret;

ret = -2; /* not known yet */
ret = TRUE;
pos = 0;
has_back_reference = FALSE;
memset(capture_bitmap, 0, sizeof(capture_bitmap));

while (pos < bc_buf_len) {
opcode = bc_buf[pos];
Expand All @@ -1010,8 +1005,7 @@ static int re_check_advance(const uint8_t *bc_buf, int bc_buf_len)
case REOP_dot:
case REOP_any:
simple_char:
if (ret == -2)
ret = 1;
ret = FALSE;
break;
case REOP_line_start:
case REOP_line_end:
Expand All @@ -1025,41 +1019,16 @@ static int re_check_advance(const uint8_t *bc_buf, int bc_buf_len)
break;
case REOP_save_start:
case REOP_save_end:
val = bc_buf[pos + 1];
capture_bitmap[val] |= 1;
break;
case REOP_save_reset:
{
val = bc_buf[pos + 1];
last = bc_buf[pos + 2];
while (val < last)
capture_bitmap[val++] |= 1;
}
break;
case REOP_back_reference:
case REOP_backward_back_reference:
val = bc_buf[pos + 1];
capture_bitmap[val] |= 2;
has_back_reference = TRUE;
break;
default:
/* safe behvior: we cannot predict the outcome */
if (ret == -2)
ret = 0;
break;
return TRUE;
}
pos += len;
}
if (has_back_reference) {
/* check if there is back reference which references a capture
made in the some code */
for(i = 0; i < CAPTURE_COUNT_MAX; i++) {
if (capture_bitmap[i] == 3)
return -1;
}
}
if (ret == -2)
ret = 0;
return ret;
}

Expand Down Expand Up @@ -1638,8 +1607,8 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
running the atom after the first quant_min times,
then there is no match. We remove this test when we
are sure the atom always advances the position. */
add_zero_advance_check = (re_check_advance(s->byte_code.buf + last_atom_start,
s->byte_code.size - last_atom_start) == 0);
add_zero_advance_check = re_need_check_advance(s->byte_code.buf + last_atom_start,
s->byte_code.size - last_atom_start);

{
int len, pos;
Expand All @@ -1656,38 +1625,34 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
}
if (quant_max == 0) {
s->byte_code.size = last_atom_start;
} else if (quant_max == 1) {
if (dbuf_insert(&s->byte_code, last_atom_start, 5))
goto out_of_memory;
s->byte_code.buf[last_atom_start] = REOP_split_goto_first +
greedy;
put_u32(s->byte_code.buf + last_atom_start + 1, len);
} else if (quant_max == INT32_MAX) {
} else if (quant_max == 1 || quant_max == INT32_MAX) {
BOOL has_goto = (quant_max == INT32_MAX);
if (dbuf_insert(&s->byte_code, last_atom_start, 5 + add_zero_advance_check))
goto out_of_memory;
s->byte_code.buf[last_atom_start] = REOP_split_goto_first +
greedy;
put_u32(s->byte_code.buf + last_atom_start + 1,
len + 5 + add_zero_advance_check);
len + 5 * has_goto + add_zero_advance_check * 2);
if (add_zero_advance_check) {
/* avoid infinite loop by stoping the
recursion if no advance was made in the
atom (only works if the atom has no
side effect) */
s->byte_code.buf[last_atom_start + 1 + 4] = REOP_push_char_pos;
re_emit_goto(s, REOP_bne_char_pos, last_atom_start);
} else {
re_emit_goto(s, REOP_goto, last_atom_start);
re_emit_op(s, REOP_check_advance);
}
if (has_goto)
re_emit_goto(s, REOP_goto, last_atom_start);
} else {
if (dbuf_insert(&s->byte_code, last_atom_start, 10))
if (dbuf_insert(&s->byte_code, last_atom_start, 10 + add_zero_advance_check))
goto out_of_memory;
pos = last_atom_start;
s->byte_code.buf[pos++] = REOP_push_i32;
put_u32(s->byte_code.buf + pos, quant_max);
pos += 4;
s->byte_code.buf[pos++] = REOP_split_goto_first + greedy;
put_u32(s->byte_code.buf + pos, len + 5);
put_u32(s->byte_code.buf + pos, len + 5 + add_zero_advance_check * 2);
pos += 4;
if (add_zero_advance_check) {
s->byte_code.buf[pos++] = REOP_push_char_pos;
re_emit_op(s, REOP_check_advance);
Comment on lines +1653 to +1654
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't fully understand why this puts the REOP_check_advance at the end of the buffer while everything else is inserted before the end. Is pos == s->byte_code.size on the second line?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll admit I have no idea, I just ported the fix and ran the tests 😅

}
re_emit_goto(s, REOP_loop, last_atom_start + 5);
re_emit_op(s, REOP_drop);
}
Expand All @@ -1711,22 +1676,25 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
if (quant_max == INT32_MAX) {
pos = s->byte_code.size;
re_emit_op_u32(s, REOP_split_goto_first + greedy,
len + 5 + add_zero_advance_check);
len + 5 + add_zero_advance_check * 2);
if (add_zero_advance_check)
re_emit_op(s, REOP_push_char_pos);
/* copy the atom */
dbuf_put_self(&s->byte_code, last_atom_start, len);
if (add_zero_advance_check)
re_emit_goto(s, REOP_bne_char_pos, pos);
else
re_emit_goto(s, REOP_goto, pos);
re_emit_op(s, REOP_check_advance);
re_emit_goto(s, REOP_goto, pos);
} else if (quant_max > quant_min) {
re_emit_op_u32(s, REOP_push_i32, quant_max - quant_min);
pos = s->byte_code.size;
re_emit_op_u32(s, REOP_split_goto_first + greedy, len + 5);
re_emit_op_u32(s, REOP_split_goto_first + greedy,
len + 5 + add_zero_advance_check * 2);
if (add_zero_advance_check)
re_emit_op(s, REOP_push_char_pos);
/* copy the atom */
dbuf_put_self(&s->byte_code, last_atom_start, len);

if (add_zero_advance_check)
re_emit_op(s, REOP_check_advance);
re_emit_goto(s, REOP_loop, pos);
re_emit_op(s, REOP_drop);
}
Expand Down Expand Up @@ -1840,7 +1808,7 @@ static int lre_compute_stack_size(const uint8_t *bc_buf, int bc_buf_len)
}
break;
case REOP_drop:
case REOP_bne_char_pos:
case REOP_check_advance:
assert(stack_size > 0);
stack_size--;
break;
Expand Down Expand Up @@ -2336,11 +2304,9 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
case REOP_push_char_pos:
stack[stack_len++] = (uintptr_t)cptr;
break;
case REOP_bne_char_pos:
val = get_u32(pc);
pc += 4;
if (stack[--stack_len] != (uintptr_t)cptr)
pc += (int)val;
case REOP_check_advance:
if (stack[--stack_len] == (uintptr_t)cptr)
goto no_match;
break;
case REOP_word_boundary:
case REOP_not_word_boundary:
Expand Down
2 changes: 1 addition & 1 deletion quickjs.c
Original file line number Diff line number Diff line change
Expand Up @@ -33205,7 +33205,7 @@ typedef enum BCTagEnum {
BC_TAG_SET,
} BCTagEnum;

#define BC_VERSION 13
#define BC_VERSION 14
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The generated bytecode changed, so I bumped the version.


typedef struct BCWriterState {
JSContext *ctx;
Expand Down
4 changes: 0 additions & 4 deletions test262_errors.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,6 @@ test262/test/built-ins/AsyncFromSyncIteratorPrototype/throw/throw-undefined-retu
test262/test/built-ins/AsyncFromSyncIteratorPrototype/throw/throw-undefined-return-not-object.js:72: strict mode: TypeError: $DONE() not called
test262/test/built-ins/AsyncFromSyncIteratorPrototype/throw/throw-undefined-return-object.js:66: TypeError: $DONE() not called
test262/test/built-ins/AsyncFromSyncIteratorPrototype/throw/throw-undefined-return-object.js:66: strict mode: TypeError: $DONE() not called
test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: Test262Error: Expected [a, abc] and [a, undefined] to have the same contents. ? quantifier
test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: strict mode: Test262Error: Expected [a, abc] and [a, undefined] to have the same contents. ? quantifier
test262/test/built-ins/RegExp/nullable-quantifier.js:21: Test262Error: The regex is expected to match the whole string Expected SameValue(«a», «ab») to be true
test262/test/built-ins/RegExp/nullable-quantifier.js:21: strict mode: Test262Error: The regex is expected to match the whole string Expected SameValue(«a», «ab») to be true
test262/test/built-ins/RegExp/property-escapes/generated/Alphabetic.js:16: Test262Error: `\p{Alphabetic}` should match U+02EBF0 (`𮯰`)
test262/test/built-ins/RegExp/property-escapes/generated/Alphabetic.js:16: strict mode: Test262Error: `\p{Alphabetic}` should match U+02EBF0 (`𮯰`)
test262/test/built-ins/RegExp/property-escapes/generated/Assigned.js:16: Test262Error: `\p{Assigned}` should match U+002FFC (`⿼`)
Expand Down
8 changes: 8 additions & 0 deletions tests/test_builtin.js
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,14 @@ function test_regexp()
/* test zero length matches */
a = /()*?a/.exec(",");
assert(a, null);
a = /(?:(?=(abc)))a/.exec("abc");
assert(a, ["a", "abc"]);
a = /(?:(?=(abc)))?a/.exec("abc");
assert(a, ["a", undefined]);
a = /(?:(?=(abc))){0,2}a/.exec("abc");
assert(a, ["a", undefined]);
a = /(?:|[\w])+([0-9])/.exec("123a23");
assert(a, ["123a23", "3"]);
}

function test_symbol()
Expand Down
Loading