From bf8f9fc852347c8355a98c12721beabd6c9217d7 Mon Sep 17 00:00:00 2001 From: David Iglesias Teixeira Date: Wed, 12 Mar 2025 17:27:54 -0700 Subject: [PATCH 1/5] [html] Allow ampersands in attribute values. --- pkgs/html/lib/dom.dart | 11 +++++++++-- pkgs/html/test/parser_feature_test.dart | 12 ++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/pkgs/html/lib/dom.dart b/pkgs/html/lib/dom.dart index 0c6b38e58..07c2e11b0 100644 --- a/pkgs/html/lib/dom.dart +++ b/pkgs/html/lib/dom.dart @@ -283,8 +283,15 @@ abstract class Node { final tokenizer = HtmlTokenizer(sourceSpan!.text, generateSpans: true, attributeSpans: true); - tokenizer.moveNext(); - final token = tokenizer.current as StartTagToken; + // Find the start token in the tokenized source. This is needed because + // the tokenizer may introduce non-fatal (but unexpected here) + // `ParseErrorToken`s. + Token token; + do { + tokenizer.moveNext(); + token = tokenizer.current; + } while (token.kind != TokenKind.startTag); + token as StartTagToken; if (token.attributeSpans == null) return; // no attributes diff --git a/pkgs/html/test/parser_feature_test.dart b/pkgs/html/test/parser_feature_test.dart index 7156146e0..346b747ad 100644 --- a/pkgs/html/test/parser_feature_test.dart +++ b/pkgs/html/test/parser_feature_test.dart @@ -148,6 +148,18 @@ On line 4, column 3 of ParseError: Unexpected DOCTYPE. Ignored. expect(elem.attributeSpans!['extends'], null); }); + test('attribute spans if value contains & (non-fatal ParseErrorTokens)', () { + final expectedUrl = 'foo?key=value&key2=value2'; + final text = ''; + + final doc = parse(text, generateSpans: true); + final elem = doc.querySelector('script')!; + final span = elem.attributeValueSpans!['src']!; + + expect(span.start.offset, text.indexOf('foo')); + expect(span.text, expectedUrl); + }); + test('void element innerHTML', () { var doc = parse('
'); expect(doc.body!.innerHtml, '
'); From 7f339c2815fb2040589e2c367e22af4e43413069 Mon Sep 17 00:00:00 2001 From: David Iglesias Teixeira Date: Thu, 13 Mar 2025 11:03:15 -0700 Subject: [PATCH 2/5] Revert change in dom.dart --- pkgs/html/lib/dom.dart | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/pkgs/html/lib/dom.dart b/pkgs/html/lib/dom.dart index 07c2e11b0..0c6b38e58 100644 --- a/pkgs/html/lib/dom.dart +++ b/pkgs/html/lib/dom.dart @@ -283,15 +283,8 @@ abstract class Node { final tokenizer = HtmlTokenizer(sourceSpan!.text, generateSpans: true, attributeSpans: true); - // Find the start token in the tokenized source. This is needed because - // the tokenizer may introduce non-fatal (but unexpected here) - // `ParseErrorToken`s. - Token token; - do { - tokenizer.moveNext(); - token = tokenizer.current; - } while (token.kind != TokenKind.startTag); - token as StartTagToken; + tokenizer.moveNext(); + final token = tokenizer.current as StartTagToken; if (token.attributeSpans == null) return; // no attributes From dae4def45fd37e99f76187948190dd0a05d73c7c Mon Sep 17 00:00:00 2001 From: David Iglesias Teixeira Date: Thu, 13 Mar 2025 11:05:02 -0700 Subject: [PATCH 3/5] Only emit a parse error for ambiguous ampersands if they're found outside an attribute value. Co-Authored-By: sigmundch --- pkgs/html/lib/src/tokenizer.dart | 7 +++++-- pkgs/html/test/data/tokenizer/test4.test | 4 ++++ pkgs/html/test/parser_feature_test.dart | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pkgs/html/lib/src/tokenizer.dart b/pkgs/html/lib/src/tokenizer.dart index 3bed0b0b6..228087c3d 100644 --- a/pkgs/html/lib/src/tokenizer.dart +++ b/pkgs/html/lib/src/tokenizer.dart @@ -313,7 +313,6 @@ class HtmlTokenizer implements Iterator { // Try to find the longest entity the string will match to take care // of ¬i for instance. - int entityLen; for (entityLen = charStack.length - 1; entityLen > 1; entityLen--) { final possibleEntityName = charStack.sublist(0, entityLen).join(); @@ -340,7 +339,11 @@ class HtmlTokenizer implements Iterator { output = '$output${slice(charStack, entityLen).join()}'; } } else { - _addToken(ParseErrorToken('expected-named-entity')); + if (!fromAttribute) { + // Only emit this error token when we're consuming this NOT as part of an attribute. + // See: https://html.spec.whatwg.org/multipage/parsing.html#ambiguous-ampersand-state + _addToken(ParseErrorToken('expected-named-entity')); + } stream.unget(charStack.removeLast()); output = '&${charStack.join()}'; } diff --git a/pkgs/html/test/data/tokenizer/test4.test b/pkgs/html/test/data/tokenizer/test4.test index c0f3b2b8c..6c0a77ce1 100644 --- a/pkgs/html/test/data/tokenizer/test4.test +++ b/pkgs/html/test/data/tokenizer/test4.test @@ -28,6 +28,10 @@ "input":"", "output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"=": "=="}]]}, +{"description":"Ambiguous ampersand in attribute value", +"input":"", +"output":[["StartTag", "tag", {"attr": "foo?a=b&c=d"}]]}, + {"description":"Allowed \" after ampersand in attribute value", "input":"", "output":[["StartTag", "z", {"z": "&"}]]}, diff --git a/pkgs/html/test/parser_feature_test.dart b/pkgs/html/test/parser_feature_test.dart index 346b747ad..df9172ee7 100644 --- a/pkgs/html/test/parser_feature_test.dart +++ b/pkgs/html/test/parser_feature_test.dart @@ -148,7 +148,7 @@ On line 4, column 3 of ParseError: Unexpected DOCTYPE. Ignored. expect(elem.attributeSpans!['extends'], null); }); - test('attribute spans if value contains & (non-fatal ParseErrorTokens)', () { + test('attribute spans if value contains & (ambiguous ampersand)', () { final expectedUrl = 'foo?key=value&key2=value2'; final text = ''; From 749b9573e0112967d71a3c061b00237821cccb1a Mon Sep 17 00:00:00 2001 From: David Iglesias Teixeira Date: Thu, 13 Mar 2025 11:10:24 -0700 Subject: [PATCH 4/5] Update CHANGELOG and roll version. --- pkgs/html/CHANGELOG.md | 4 ++++ pkgs/html/pubspec.yaml | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pkgs/html/CHANGELOG.md b/pkgs/html/CHANGELOG.md index 9a881e9d7..cb38545c5 100644 --- a/pkgs/html/CHANGELOG.md +++ b/pkgs/html/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.15.6 + +- Support "ambiguous ampersand" in attribute values. + ## 0.15.5 - Require Dart `3.2`. diff --git a/pkgs/html/pubspec.yaml b/pkgs/html/pubspec.yaml index 447b98e18..1829e6230 100644 --- a/pkgs/html/pubspec.yaml +++ b/pkgs/html/pubspec.yaml @@ -1,5 +1,5 @@ name: html -version: 0.15.5 +version: 0.15.6 description: APIs for parsing and manipulating HTML content outside the browser. repository: https://github.com/dart-lang/tools/tree/main/pkgs/html issue_tracker: https://github.com/dart-lang/tools/issues?q=is%3Aissue+is%3Aopen+label%3Apackage%3Ahtml From 1d1020fa8d938e5b3160d63f5733a2de04b49433 Mon Sep 17 00:00:00 2001 From: David Iglesias Teixeira Date: Thu, 13 Mar 2025 11:13:47 -0700 Subject: [PATCH 5/5] Roll version less, other parsing fixes seem to be patch increments (like 0.14.0+4 --- pkgs/html/CHANGELOG.md | 2 +- pkgs/html/pubspec.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkgs/html/CHANGELOG.md b/pkgs/html/CHANGELOG.md index cb38545c5..eaa1f8c00 100644 --- a/pkgs/html/CHANGELOG.md +++ b/pkgs/html/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.15.6 +## 0.15.5+1 - Support "ambiguous ampersand" in attribute values. diff --git a/pkgs/html/pubspec.yaml b/pkgs/html/pubspec.yaml index 1829e6230..7508588ad 100644 --- a/pkgs/html/pubspec.yaml +++ b/pkgs/html/pubspec.yaml @@ -1,5 +1,5 @@ name: html -version: 0.15.6 +version: 0.15.5+1 description: APIs for parsing and manipulating HTML content outside the browser. repository: https://github.com/dart-lang/tools/tree/main/pkgs/html issue_tracker: https://github.com/dart-lang/tools/issues?q=is%3Aissue+is%3Aopen+label%3Apackage%3Ahtml