From 95ca687c7e3ef25b7277ec29c5b28278c3b3aa17 Mon Sep 17 00:00:00 2001 From: Chris Bobbe Date: Tue, 30 Jan 2024 14:00:38 -0800 Subject: [PATCH 1/8] content [nfc]: Inline a small helper function --- lib/model/content.dart | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/model/content.dart b/lib/model/content.dart index c22970a1aa..abf21e8d29 100644 --- a/lib/model/content.dart +++ b/lib/model/content.dart @@ -887,7 +887,6 @@ class _ZulipContentParser { final element = node; final localName = element.localName; final classes = element.classes; - List blockNodes() => parseBlockContentList(element.nodes); if (localName == 'br' && classes.isEmpty) { return LineBreakNode(debugHtmlNode: debugHtmlNode); @@ -940,7 +939,8 @@ class _ZulipContentParser { } if (localName == 'blockquote' && classes.isEmpty) { - return QuotationNode(blockNodes(), debugHtmlNode: debugHtmlNode); + return QuotationNode(debugHtmlNode: debugHtmlNode, + parseBlockContentList(element.nodes)); } if (localName == 'div' From 96ee7a9953e2fe541b92b4908f7e13dd5a06ba55 Mon Sep 17 00:00:00 2001 From: Chris Bobbe Date: Wed, 31 Jan 2024 12:06:22 -0800 Subject: [PATCH 2/8] content: Save some work by avoiding `classes.contains` in many cases It turns out that `classes.contains` instantiates a Set every time it's called. That's unfortunate. So, remove some of this work by just checking the class string (`className`) directly, in the case where we expect just one class. When we expect two or more classes, we'll want a check that doesn't enforce a particular order, so perhaps we'll use a regular expression for those, or something, in later work. See #497. Fixes-partly: #497 --- lib/model/content.dart | 43 ++++++++++++++---------------------------- 1 file changed, 14 insertions(+), 29 deletions(-) diff --git a/lib/model/content.dart b/lib/model/content.dart index abf21e8d29..3bd06741aa 100644 --- a/lib/model/content.dart +++ b/lib/model/content.dart @@ -577,22 +577,17 @@ class _ZulipContentParser { final dom.Element katexElement; if (!block) { - assert(element.localName == 'span' - && element.classes.length == 1 - && element.classes.contains('katex')); + assert(element.localName == 'span' && element.className == 'katex'); katexElement = element; } else { - assert(element.localName == 'span' - && element.classes.length == 1 - && element.classes.contains('katex-display')); + assert(element.localName == 'span' && element.className == 'katex-display'); if (element.nodes.length != 1) return null; final child = element.nodes.single; if (child is! dom.Element) return null; if (child.localName != 'span') return null; - if (child.classes.length != 1) return null; - if (!child.classes.contains('katex')) return null; + if (child.className != 'katex') return null; katexElement = child; } @@ -602,8 +597,7 @@ class _ZulipContentParser { final child = katexElement.nodes.first; if (child is! dom.Element) return null; if (child.localName != 'span') return null; - if (child.classes.length != 1) return null; - if (!child.classes.contains('katex-mathml')) return null; + if (child.className != 'katex-mathml') return null; if (child.nodes.length != 1) return null; final grandchild = child.nodes.single; @@ -655,6 +649,7 @@ class _ZulipContentParser { final element = node; final localName = element.localName; final classes = element.classes; + final className = element.className; List nodes() => parseInlineContentList(element.nodes); if (localName == 'br' && classes.isEmpty) { @@ -672,9 +667,7 @@ class _ZulipContentParser { if (localName == 'a' && (classes.isEmpty - || (classes.length == 1 - && (classes.contains('stream-topic') - || classes.contains('stream'))))) { + || (className == 'stream-topic' || className == 'stream'))) { final href = element.attributes['href']; if (href == null) return unimplemented(); final link = LinkNode(nodes: nodes(), url: href, debugHtmlNode: debugHtmlNode); @@ -707,9 +700,7 @@ class _ZulipContentParser { return UnicodeEmojiNode(emojiUnicode: unicode, debugHtmlNode: debugHtmlNode); } - if (localName == 'img' - && classes.contains('emoji') - && classes.length == 1) { + if (localName == 'img' && className == 'emoji') { final alt = element.attributes['alt']; if (alt == null) return unimplemented(); final src = element.attributes['src']; @@ -717,9 +708,7 @@ class _ZulipContentParser { return ImageEmojiNode(src: src, alt: alt, debugHtmlNode: debugHtmlNode); } - if (localName == 'span' - && classes.length == 1 - && classes.contains('katex')) { + if (localName == 'span' && className == 'katex') { final texSource = parseMath(element, block: false); if (texSource == null) return unimplemented(); return MathInlineNode(texSource: texSource, debugHtmlNode: debugHtmlNode); @@ -775,8 +764,7 @@ class _ZulipContentParser { assert(_debugParserContext == _ParserContext.block); final mainElement = () { assert(divElement.localName == 'div' - && divElement.classes.length == 1 - && divElement.classes.contains("codehilite")); + && divElement.className == "codehilite"); if (divElement.nodes.length != 1) return null; final child = divElement.nodes[0]; @@ -848,8 +836,7 @@ class _ZulipContentParser { assert(_debugParserContext == _ParserContext.block); final imgElement = () { assert(divElement.localName == 'div' - && divElement.classes.length == 1 - && divElement.classes.contains('message_inline_image')); + && divElement.className == 'message_inline_image'); if (divElement.nodes.length != 1) return null; final child = divElement.nodes[0]; @@ -886,6 +873,7 @@ class _ZulipContentParser { } final element = node; final localName = element.localName; + final className = element.className; final classes = element.classes; if (localName == 'br' && classes.isEmpty) { @@ -895,8 +883,7 @@ class _ZulipContentParser { if (localName == 'p' && classes.isEmpty) { // Oddly, the way a math block gets encoded in Zulip HTML is inside a

. if (element.nodes case [dom.Element(localName: 'span') && var child, ...]) { - if (child.classes.length == 1 - && child.classes.contains('katex-display')) { + if (child.className == 'katex-display') { if (element.nodes case [_] || [_, dom.Element(localName: 'br'), dom.Text(text: "\n")]) { @@ -943,13 +930,11 @@ class _ZulipContentParser { parseBlockContentList(element.nodes)); } - if (localName == 'div' - && classes.length == 1 && classes.contains('codehilite')) { + if (localName == 'div' && className == 'codehilite') { return parseCodeBlock(element); } - if (localName == 'div' - && classes.length == 1 && classes.contains('message_inline_image')) { + if (localName == 'div' && className == 'message_inline_image') { return parseImageNode(element); } From 432b0570f7b665f2272e699e882f65b20718fff3 Mon Sep 17 00:00:00 2001 From: Greg Price Date: Fri, 2 Feb 2024 09:47:42 -0800 Subject: [PATCH 3/8] content: Save work by using className over `classes` for zero classes, too --- lib/model/content.dart | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/lib/model/content.dart b/lib/model/content.dart index 3bd06741aa..e8e15c6729 100644 --- a/lib/model/content.dart +++ b/lib/model/content.dart @@ -652,21 +652,21 @@ class _ZulipContentParser { final className = element.className; List nodes() => parseInlineContentList(element.nodes); - if (localName == 'br' && classes.isEmpty) { + if (localName == 'br' && className.isEmpty) { return LineBreakInlineNode(debugHtmlNode: debugHtmlNode); } - if (localName == 'strong' && classes.isEmpty) { + if (localName == 'strong' && className.isEmpty) { return StrongNode(nodes: nodes(), debugHtmlNode: debugHtmlNode); } - if (localName == 'em' && classes.isEmpty) { + if (localName == 'em' && className.isEmpty) { return EmphasisNode(nodes: nodes(), debugHtmlNode: debugHtmlNode); } - if (localName == 'code' && classes.isEmpty) { + if (localName == 'code' && className.isEmpty) { return InlineCodeNode(nodes: nodes(), debugHtmlNode: debugHtmlNode); } if (localName == 'a' - && (classes.isEmpty + && (className.isEmpty || (className == 'stream-topic' || className == 'stream'))) { final href = element.attributes['href']; if (href == null) return unimplemented(); @@ -745,13 +745,13 @@ class _ZulipContentParser { case 'ul': listStyle = ListStyle.unordered; break; } assert(listStyle != null); - assert(element.classes.isEmpty); + assert(element.className.isEmpty); final debugHtmlNode = kDebugMode ? element : null; final List> items = []; for (final item in element.nodes) { if (item is dom.Text && item.text == '\n') continue; - if (item is! dom.Element || item.localName != 'li' || item.classes.isNotEmpty) { + if (item is! dom.Element || item.localName != 'li' || item.className.isNotEmpty) { items.add([UnimplementedBlockContentNode(htmlNode: item)]); } items.add(parseImplicitParagraphBlockContentList(item.nodes)); @@ -842,13 +842,13 @@ class _ZulipContentParser { final child = divElement.nodes[0]; if (child is! dom.Element) return null; if (child.localName != 'a') return null; - if (child.classes.isNotEmpty) return null; + if (child.className.isNotEmpty) return null; if (child.nodes.length != 1) return null; final grandchild = child.nodes[0]; if (grandchild is! dom.Element) return null; if (grandchild.localName != 'img') return null; - if (grandchild.classes.isNotEmpty) return null; + if (grandchild.className.isNotEmpty) return null; return grandchild; }(); @@ -874,13 +874,12 @@ class _ZulipContentParser { final element = node; final localName = element.localName; final className = element.className; - final classes = element.classes; - if (localName == 'br' && classes.isEmpty) { + if (localName == 'br' && className.isEmpty) { return LineBreakNode(debugHtmlNode: debugHtmlNode); } - if (localName == 'p' && classes.isEmpty) { + if (localName == 'p' && className.isEmpty) { // Oddly, the way a math block gets encoded in Zulip HTML is inside a

. if (element.nodes case [dom.Element(localName: 'span') && var child, ...]) { if (child.className == 'katex-display') { @@ -913,7 +912,7 @@ class _ZulipContentParser { case 'h5': headingLevel = HeadingLevel.h5; break; case 'h6': headingLevel = HeadingLevel.h6; break; } - if (headingLevel != null && classes.isEmpty) { + if (headingLevel != null && className.isEmpty) { final parsed = parseBlockInline(element.nodes); return HeadingNode(debugHtmlNode: debugHtmlNode, level: headingLevel, @@ -921,11 +920,11 @@ class _ZulipContentParser { nodes: parsed.nodes); } - if ((localName == 'ol' || localName == 'ul') && classes.isEmpty) { + if ((localName == 'ol' || localName == 'ul') && className.isEmpty) { return parseListNode(element); } - if (localName == 'blockquote' && classes.isEmpty) { + if (localName == 'blockquote' && className.isEmpty) { return QuotationNode(debugHtmlNode: debugHtmlNode, parseBlockContentList(element.nodes)); } From d3da7f5a064612fb240529c02f8bac3d9a44eb63 Mon Sep 17 00:00:00 2001 From: Greg Price Date: Fri, 2 Feb 2024 12:07:10 -0800 Subject: [PATCH 4/8] content [nfc]: Cut dead empty-class case for code-highlighting spans This className string can never be empty, because it's an item from a CssClassSet, and those consist of nonempty strings. (If for example the element's "class" attribute is absent, or empty, or pure whitespace, then the resulting class set still has no empty strings; it'll just be an empty set.) --- lib/model/code_block.dart | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/model/code_block.dart b/lib/model/code_block.dart index 34c7c5506b..222981f6e1 100644 --- a/lib/model/code_block.dart +++ b/lib/model/code_block.dart @@ -174,7 +174,6 @@ enum CodeBlockSpanType { CodeBlockSpanType codeBlockSpanTypeFromClassName(String className) { return switch (className) { - '' => CodeBlockSpanType.text, 'hll' => CodeBlockSpanType.highlightedLines, 'w' => CodeBlockSpanType.whitespace, 'esc' => CodeBlockSpanType.escape, From 36feae2dfab8aec63367ae5ed45d520fbc63521f Mon Sep 17 00:00:00 2001 From: Greg Price Date: Fri, 2 Feb 2024 12:12:45 -0800 Subject: [PATCH 5/8] content: Use className rather than class set, for CodeBlockSpanNode This is NFC except in the case where className looks like " foo ", with leading and/or trailing whitespace. In that case, the previous code would accept it (if the class name in the middle was one we recognized), and this code will reject as unimplemented. For cases where the previous code would reject because `classes.length` was not 1, this code will reject because className won't be one that codeBlockSpanTypeFromClassName recognizes. --- lib/model/content.dart | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/model/content.dart b/lib/model/content.dart index e8e15c6729..e29dc365b3 100644 --- a/lib/model/content.dart +++ b/lib/model/content.dart @@ -807,9 +807,8 @@ class _ZulipContentParser { } span = CodeBlockSpanNode(text: text, type: CodeBlockSpanType.text); - case dom.Element(localName: 'span', :final text, :final classes) - when classes.length == 1: - final CodeBlockSpanType type = codeBlockSpanTypeFromClassName(classes.first); + case dom.Element(localName: 'span', :final text, :final className): + final CodeBlockSpanType type = codeBlockSpanTypeFromClassName(className); switch (type) { case CodeBlockSpanType.unknown: // TODO(#194): Show these as un-syntax-highlighted code, in production. From d94df90b57af20c89953d3f317a34212b304341a Mon Sep 17 00:00:00 2001 From: Greg Price Date: Fri, 2 Feb 2024 10:01:14 -0800 Subject: [PATCH 6/8] content: Use className regexp for user-mention, instead of class set This is more efficient but should otherwise behave exactly the same, except if the className has extraneous whitespace: the previous code would accept that, and this code will reject as unimplemented. To help verify that we've gotten this change right, add some more tests, so that we cover all the different alternatives in this regexp. --- lib/model/content.dart | 12 ++++++++---- test/model/content_test.dart | 22 +++++++++++++++++++++- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/lib/model/content.dart b/lib/model/content.dart index e29dc365b3..c73931d4a2 100644 --- a/lib/model/content.dart +++ b/lib/model/content.dart @@ -632,6 +632,13 @@ class _ZulipContentParser { return result; } + static final _userMentionClassNameRegexp = () { + // This matches a class `user-mention` or `user-group-mention`, + // plus an optional class `silent`, appearing in either order. + const mentionClass = r"user(?:-group)?-mention"; + return RegExp("^(?:$mentionClass(?: silent)?|silent $mentionClass)\$"); + }(); + static final _emojiClassRegexp = RegExp(r"^emoji(-[0-9a-f]+)*$"); InlineContentNode parseInlineContent(dom.Node node) { @@ -676,10 +683,7 @@ class _ZulipContentParser { } if (localName == 'span' - && (classes.contains('user-mention') - || classes.contains('user-group-mention')) - && (classes.length == 1 - || (classes.length == 2 && classes.contains('silent')))) { + && _userMentionClassNameRegexp.hasMatch(className)) { // TODO assert UserMentionNode can't contain LinkNode; // either a debug-mode check, or perhaps we can make expectations much // tighter on a UserMentionNode's contents overall. diff --git a/test/model/content_test.dart b/test/model/content_test.dart index 87c6c182e6..5805f5e22c 100644 --- a/test/model/content_test.dart +++ b/test/model/content_test.dart @@ -138,7 +138,27 @@ void main() { '

Greg Price

', const UserMentionNode(nodes: [TextNode('Greg Price')])); - // TODO test group mentions and wildcard mentions + testParseInline('silent user @-mention, class order reversed', + // "@_**Greg Price**" (hypothetical server variation) + '

Greg Price

', + const UserMentionNode(nodes: [TextNode('Greg Price')])); + + testParseInline('plain group @-mention', + // "@*test-empty*" + '

@test-empty

', + const UserMentionNode(nodes: [TextNode('@test-empty')])); + + testParseInline('silent group @-mention', + // "@_*test-empty*" + '

test-empty

', + const UserMentionNode(nodes: [TextNode('test-empty')])); + + testParseInline('silent group @-mention, class order reversed', + // "@_*test-empty*" (hypothetical server variation) + '

test-empty

', + const UserMentionNode(nodes: [TextNode('test-empty')])); + + // TODO test wildcard mentions }); testParseInline('parse Unicode emoji, encoded in span element', From 89ea1cd2c038a610974c29dfb3f54db34a50f4b7 Mon Sep 17 00:00:00 2001 From: Greg Price Date: Fri, 2 Feb 2024 10:01:38 -0800 Subject: [PATCH 7/8] content [nfc]: Use non-capturing group in _emojiClassRegexp We never look at the captured group, so it's more efficient to not ask the regexp engine to capture it in the first place. --- lib/model/content.dart | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/model/content.dart b/lib/model/content.dart index c73931d4a2..804cfb6d1a 100644 --- a/lib/model/content.dart +++ b/lib/model/content.dart @@ -639,7 +639,7 @@ class _ZulipContentParser { return RegExp("^(?:$mentionClass(?: silent)?|silent $mentionClass)\$"); }(); - static final _emojiClassRegexp = RegExp(r"^emoji(-[0-9a-f]+)*$"); + static final _emojiClassRegexp = RegExp(r"^emoji(?:-[0-9a-f]+)*$"); InlineContentNode parseInlineContent(dom.Node node) { assert(_debugParserContext == _ParserContext.inline); From 0acde8558dc9f0aa500b0f2d20a6589d56b3be5b Mon Sep 17 00:00:00 2001 From: Greg Price Date: Fri, 2 Feb 2024 10:24:26 -0800 Subject: [PATCH 8/8] content: Use className regexp for Unicode emoji, instead of class set And add another test for more complete coverage. This removes our last use of the `classes` class set, so it completes #497. Fixes: #497 --- lib/model/content.dart | 18 ++++++++---------- test/model/content_test.dart | 5 +++++ 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/lib/model/content.dart b/lib/model/content.dart index 804cfb6d1a..0b0bb502bb 100644 --- a/lib/model/content.dart +++ b/lib/model/content.dart @@ -639,7 +639,11 @@ class _ZulipContentParser { return RegExp("^(?:$mentionClass(?: silent)?|silent $mentionClass)\$"); }(); - static final _emojiClassRegexp = RegExp(r"^emoji(?:-[0-9a-f]+)*$"); + static final _emojiClassNameRegexp = () { + const specificEmoji = r"emoji(?:-[0-9a-f]+)+"; + return RegExp("^(?:emoji $specificEmoji|$specificEmoji emoji)\$"); + }(); + static final _emojiCodeFromClassNameRegexp = RegExp(r"emoji-([^ ]+)"); InlineContentNode parseInlineContent(dom.Node node) { assert(_debugParserContext == _ParserContext.inline); @@ -655,7 +659,6 @@ class _ZulipContentParser { final element = node; final localName = element.localName; - final classes = element.classes; final className = element.className; List nodes() => parseInlineContentList(element.nodes); @@ -691,14 +694,9 @@ class _ZulipContentParser { } if (localName == 'span' - && classes.length == 2 - && classes.contains('emoji') - && classes.every(_emojiClassRegexp.hasMatch)) { - final emojiCode = classes - .firstWhere((className) => className.startsWith('emoji-')) - .replaceFirst('emoji-', ''); - assert(emojiCode.isNotEmpty); - + && _emojiClassNameRegexp.hasMatch(className)) { + final emojiCode = _emojiCodeFromClassNameRegexp.firstMatch(className)! + .group(1)!; final unicode = tryParseEmojiCodeToUnicode(emojiCode); if (unicode == null) return unimplemented(); return UnicodeEmojiNode(emojiUnicode: unicode, debugHtmlNode: debugHtmlNode); diff --git a/test/model/content_test.dart b/test/model/content_test.dart index 5805f5e22c..dc97c89b92 100644 --- a/test/model/content_test.dart +++ b/test/model/content_test.dart @@ -166,6 +166,11 @@ void main() { '

:thumbs_up:

', const UnicodeEmojiNode(emojiUnicode: '\u{1f44d}')); // "👍" + testParseInline('parse Unicode emoji, encoded in span element, class order reversed', + // ":thumbs_up:" (hypothetical server variation) + '

:thumbs_up:

', + const UnicodeEmojiNode(emojiUnicode: '\u{1f44d}')); // "👍" + testParseInline('parse Unicode emoji, encoded in span element, multiple codepoints', // ":transgender_flag:" '

:transgender_flag:

',