From 1da460399e13f93e37954ef591168c6795ca7599 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 16 Jan 2025 21:00:48 +0100 Subject: [PATCH 1/4] Fix GH-17486: Incorrect error line numbers reported in Dom\HTMLDocument::createFromString --- ext/dom/html_document.c | 5 +++ ext/dom/tests/modern/html/parser/gh17486.phpt | 37 +++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 ext/dom/tests/modern/html/parser/gh17486.phpt diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c index ed7454dd89d43..e47345f311b67 100644 --- a/ext/dom/html_document.c +++ b/ext/dom/html_document.c @@ -880,6 +880,11 @@ PHP_METHOD(Dom_HTMLDocument, createFromString) if (!result) { goto fail_oom; } + + /* In the string case we have a single buffer that acts as a sliding window. + * The `current_input_characters` field starts pointing at the start of the buffer, but needs to slide along the + * sliding window as well. */ + application_data.current_input_characters += chunk_size; } if (!dom_parse_decode_encode_finish(&ctx, document, parser, &decoding_encoding_ctx, &tokenizer_error_offset, &tree_error_offset)) { diff --git a/ext/dom/tests/modern/html/parser/gh17486.phpt b/ext/dom/tests/modern/html/parser/gh17486.phpt new file mode 100644 index 0000000000000..003363a34cc8c --- /dev/null +++ b/ext/dom/tests/modern/html/parser/gh17486.phpt @@ -0,0 +1,37 @@ +--TEST-- +GH-17486 (Incorrect error line numbers reported in Dom\HTMLDocument::createFromString) +--EXTENSIONS-- +dom +--CREDITS-- +xPaw +--FILE-- + + + + + + +
+ + +HTML; + +\Dom\HTMLDocument::createFromString($html); + +file_put_contents(__DIR__ . '/gh17486.tmp', $html); +\Dom\HTMLDocument::createFromFile(__DIR__ . '/gh17486.tmp'); + +?> +--CLEAN-- + +--EXPECTF-- +Warning: Dom\HTMLDocument::createFromString(): tokenizer error control-character-reference in Entity, line: 7, column: 9 in %s on line %d + +Warning: Dom\HTMLDocument::createFromFile(): tokenizer error control-character-reference in %sgh17486.tmp, line: 7, column: 9 in %s on line %d From ad426e809eb680f0d4216adfc3b7f878826aa8b1 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 16 Jan 2025 21:19:00 +0100 Subject: [PATCH 2/4] Set error_reporting --- ext/dom/tests/modern/html/parser/gh17486.phpt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ext/dom/tests/modern/html/parser/gh17486.phpt b/ext/dom/tests/modern/html/parser/gh17486.phpt index 003363a34cc8c..051e0dcba2b94 100644 --- a/ext/dom/tests/modern/html/parser/gh17486.phpt +++ b/ext/dom/tests/modern/html/parser/gh17486.phpt @@ -2,6 +2,8 @@ GH-17486 (Incorrect error line numbers reported in Dom\HTMLDocument::createFromString) --EXTENSIONS-- dom +--INI-- +error_reporting=E_ALL --CREDITS-- xPaw --FILE-- From daaa2e64c07b297bcb12e86c671d3acb5d182aad Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 16 Jan 2025 21:41:22 +0100 Subject: [PATCH 3/4] make warning work --- ext/dom/tests/modern/html/parser/gh17486.phpt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ext/dom/tests/modern/html/parser/gh17486.phpt b/ext/dom/tests/modern/html/parser/gh17486.phpt index 051e0dcba2b94..4d7ddee7dde96 100644 --- a/ext/dom/tests/modern/html/parser/gh17486.phpt +++ b/ext/dom/tests/modern/html/parser/gh17486.phpt @@ -18,7 +18,7 @@ $html = << -
+
HTML; @@ -34,6 +34,6 @@ file_put_contents(__DIR__ . '/gh17486.tmp', $html); @unlink(__DIR__ . '/gh17486.tmp'); ?> --EXPECTF-- -Warning: Dom\HTMLDocument::createFromString(): tokenizer error control-character-reference in Entity, line: 7, column: 9 in %s on line %d +Warning: Dom\HTMLDocument::createFromString(): tokenizer error null-character-reference in Entity, line: 7, column: 9 in %s on line %d -Warning: Dom\HTMLDocument::createFromFile(): tokenizer error control-character-reference in %sgh17486.tmp, line: 7, column: 9 in %s on line %d +Warning: Dom\HTMLDocument::createFromFile(): tokenizer error null-character-reference in %s line: 7, column: 9 in %s on line %d From 0b94f6d2f1bcfe2a85d8af0049670137405819bf Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 16 Jan 2025 22:21:07 +0100 Subject: [PATCH 4/4] fix ub --- ext/dom/html_document.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c index e47345f311b67..240fa71a0cca7 100644 --- a/ext/dom/html_document.c +++ b/ext/dom/html_document.c @@ -884,7 +884,9 @@ PHP_METHOD(Dom_HTMLDocument, createFromString) /* In the string case we have a single buffer that acts as a sliding window. * The `current_input_characters` field starts pointing at the start of the buffer, but needs to slide along the * sliding window as well. */ - application_data.current_input_characters += chunk_size; + if (application_data.current_input_characters) { + application_data.current_input_characters += chunk_size; + } } if (!dom_parse_decode_encode_finish(&ctx, document, parser, &decoding_encoding_ctx, &tokenizer_error_offset, &tree_error_offset)) {