From 95ca687c7e3ef25b7277ec29c5b28278c3b3aa17 Mon Sep 17 00:00:00 2001
From: Chris Bobbe <cbobbe@zulip.com>
Date: Tue, 30 Jan 2024 14:00:38 -0800
Subject: [PATCH 1/8] content [nfc]: Inline a small helper function

---
 lib/model/content.dart | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/model/content.dart b/lib/model/content.dart
index c22970a1aa..abf21e8d29 100644
--- a/lib/model/content.dart
+++ b/lib/model/content.dart
@@ -887,7 +887,6 @@ class _ZulipContentParser {
     final element = node;
     final localName = element.localName;
     final classes = element.classes;
-    List<BlockContentNode> blockNodes() => parseBlockContentList(element.nodes);
 
     if (localName == 'br' && classes.isEmpty) {
       return LineBreakNode(debugHtmlNode: debugHtmlNode);
@@ -940,7 +939,8 @@ class _ZulipContentParser {
     }
 
     if (localName == 'blockquote' && classes.isEmpty) {
-      return QuotationNode(blockNodes(), debugHtmlNode: debugHtmlNode);
+      return QuotationNode(debugHtmlNode: debugHtmlNode,
+        parseBlockContentList(element.nodes));
     }
 
     if (localName == 'div'

From 96ee7a9953e2fe541b92b4908f7e13dd5a06ba55 Mon Sep 17 00:00:00 2001
From: Chris Bobbe <cbobbe@zulip.com>
Date: Wed, 31 Jan 2024 12:06:22 -0800
Subject: [PATCH 2/8] content: Save some work by avoiding `classes.contains` in
 many cases

It turns out that `classes.contains` instantiates a Set every time
it's called. That's unfortunate.

So, remove some of this work by just checking the class string
(`className`) directly, in the case where we expect just one class.
When we expect two or more classes, we'll want a check that doesn't
enforce a particular order, so perhaps we'll use a regular
expression for those, or something, in later work. See #497.

Fixes-partly: #497
---
 lib/model/content.dart | 43 ++++++++++++++----------------------------
 1 file changed, 14 insertions(+), 29 deletions(-)

diff --git a/lib/model/content.dart b/lib/model/content.dart
index abf21e8d29..3bd06741aa 100644
--- a/lib/model/content.dart
+++ b/lib/model/content.dart
@@ -577,22 +577,17 @@ class _ZulipContentParser {
 
     final dom.Element katexElement;
     if (!block) {
-      assert(element.localName == 'span'
-          && element.classes.length == 1
-          && element.classes.contains('katex'));
+      assert(element.localName == 'span' && element.className == 'katex');
 
       katexElement = element;
     } else {
-      assert(element.localName == 'span'
-          && element.classes.length == 1
-          && element.classes.contains('katex-display'));
+      assert(element.localName == 'span' && element.className == 'katex-display');
 
       if (element.nodes.length != 1) return null;
       final child = element.nodes.single;
       if (child is! dom.Element) return null;
       if (child.localName != 'span') return null;
-      if (child.classes.length != 1) return null;
-      if (!child.classes.contains('katex')) return null;
+      if (child.className != 'katex') return null;
       katexElement = child;
     }
 
@@ -602,8 +597,7 @@ class _ZulipContentParser {
     final child = katexElement.nodes.first;
     if (child is! dom.Element) return null;
     if (child.localName != 'span') return null;
-    if (child.classes.length != 1) return null;
-    if (!child.classes.contains('katex-mathml')) return null;
+    if (child.className != 'katex-mathml') return null;
 
     if (child.nodes.length != 1) return null;
     final grandchild = child.nodes.single;
@@ -655,6 +649,7 @@ class _ZulipContentParser {
     final element = node;
     final localName = element.localName;
     final classes = element.classes;
+    final className = element.className;
     List<InlineContentNode> nodes() => parseInlineContentList(element.nodes);
 
     if (localName == 'br' && classes.isEmpty) {
@@ -672,9 +667,7 @@ class _ZulipContentParser {
 
     if (localName == 'a'
         && (classes.isEmpty
-            || (classes.length == 1
-                && (classes.contains('stream-topic')
-                    || classes.contains('stream'))))) {
+            || (className == 'stream-topic' || className == 'stream'))) {
       final href = element.attributes['href'];
       if (href == null) return unimplemented();
       final link = LinkNode(nodes: nodes(), url: href, debugHtmlNode: debugHtmlNode);
@@ -707,9 +700,7 @@ class _ZulipContentParser {
       return UnicodeEmojiNode(emojiUnicode: unicode, debugHtmlNode: debugHtmlNode);
     }
 
-    if (localName == 'img'
-        && classes.contains('emoji')
-        && classes.length == 1) {
+    if (localName == 'img' && className == 'emoji') {
       final alt = element.attributes['alt'];
       if (alt == null) return unimplemented();
       final src = element.attributes['src'];
@@ -717,9 +708,7 @@ class _ZulipContentParser {
       return ImageEmojiNode(src: src, alt: alt, debugHtmlNode: debugHtmlNode);
     }
 
-    if (localName == 'span'
-        && classes.length == 1
-        && classes.contains('katex')) {
+    if (localName == 'span' && className == 'katex') {
       final texSource = parseMath(element, block: false);
       if (texSource == null) return unimplemented();
       return MathInlineNode(texSource: texSource, debugHtmlNode: debugHtmlNode);
@@ -775,8 +764,7 @@ class _ZulipContentParser {
     assert(_debugParserContext == _ParserContext.block);
     final mainElement = () {
       assert(divElement.localName == 'div'
-          && divElement.classes.length == 1
-          && divElement.classes.contains("codehilite"));
+          && divElement.className == "codehilite");
 
       if (divElement.nodes.length != 1) return null;
       final child = divElement.nodes[0];
@@ -848,8 +836,7 @@ class _ZulipContentParser {
     assert(_debugParserContext == _ParserContext.block);
     final imgElement = () {
       assert(divElement.localName == 'div'
-          && divElement.classes.length == 1
-          && divElement.classes.contains('message_inline_image'));
+          && divElement.className == 'message_inline_image');
 
       if (divElement.nodes.length != 1) return null;
       final child = divElement.nodes[0];
@@ -886,6 +873,7 @@ class _ZulipContentParser {
     }
     final element = node;
     final localName = element.localName;
+    final className = element.className;
     final classes = element.classes;
 
     if (localName == 'br' && classes.isEmpty) {
@@ -895,8 +883,7 @@ class _ZulipContentParser {
     if (localName == 'p' && classes.isEmpty) {
       // Oddly, the way a math block gets encoded in Zulip HTML is inside a <p>.
       if (element.nodes case [dom.Element(localName: 'span') && var child, ...]) {
-        if (child.classes.length == 1
-            && child.classes.contains('katex-display')) {
+        if (child.className == 'katex-display') {
           if (element.nodes case [_]
                               || [_, dom.Element(localName: 'br'),
                                      dom.Text(text: "\n")]) {
@@ -943,13 +930,11 @@ class _ZulipContentParser {
         parseBlockContentList(element.nodes));
     }
 
-    if (localName == 'div'
-        && classes.length == 1 && classes.contains('codehilite')) {
+    if (localName == 'div' && className == 'codehilite') {
       return parseCodeBlock(element);
     }
 
-    if (localName == 'div'
-        && classes.length == 1 && classes.contains('message_inline_image')) {
+    if (localName == 'div' && className == 'message_inline_image') {
       return parseImageNode(element);
     }
 

From 432b0570f7b665f2272e699e882f65b20718fff3 Mon Sep 17 00:00:00 2001
From: Greg Price <greg@zulip.com>
Date: Fri, 2 Feb 2024 09:47:42 -0800
Subject: [PATCH 3/8] content: Save work by using className over `classes` for
 zero classes, too

---
 lib/model/content.dart | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/lib/model/content.dart b/lib/model/content.dart
index 3bd06741aa..e8e15c6729 100644
--- a/lib/model/content.dart
+++ b/lib/model/content.dart
@@ -652,21 +652,21 @@ class _ZulipContentParser {
     final className = element.className;
     List<InlineContentNode> nodes() => parseInlineContentList(element.nodes);
 
-    if (localName == 'br' && classes.isEmpty) {
+    if (localName == 'br' && className.isEmpty) {
       return LineBreakInlineNode(debugHtmlNode: debugHtmlNode);
     }
-    if (localName == 'strong' && classes.isEmpty) {
+    if (localName == 'strong' && className.isEmpty) {
       return StrongNode(nodes: nodes(), debugHtmlNode: debugHtmlNode);
     }
-    if (localName == 'em' && classes.isEmpty) {
+    if (localName == 'em' && className.isEmpty) {
       return EmphasisNode(nodes: nodes(), debugHtmlNode: debugHtmlNode);
     }
-    if (localName == 'code' && classes.isEmpty) {
+    if (localName == 'code' && className.isEmpty) {
       return InlineCodeNode(nodes: nodes(), debugHtmlNode: debugHtmlNode);
     }
 
     if (localName == 'a'
-        && (classes.isEmpty
+        && (className.isEmpty
             || (className == 'stream-topic' || className == 'stream'))) {
       final href = element.attributes['href'];
       if (href == null) return unimplemented();
@@ -745,13 +745,13 @@ class _ZulipContentParser {
       case 'ul': listStyle = ListStyle.unordered; break;
     }
     assert(listStyle != null);
-    assert(element.classes.isEmpty);
+    assert(element.className.isEmpty);
 
     final debugHtmlNode = kDebugMode ? element : null;
     final List<List<BlockContentNode>> items = [];
     for (final item in element.nodes) {
       if (item is dom.Text && item.text == '\n') continue;
-      if (item is! dom.Element || item.localName != 'li' || item.classes.isNotEmpty) {
+      if (item is! dom.Element || item.localName != 'li' || item.className.isNotEmpty) {
         items.add([UnimplementedBlockContentNode(htmlNode: item)]);
       }
       items.add(parseImplicitParagraphBlockContentList(item.nodes));
@@ -842,13 +842,13 @@ class _ZulipContentParser {
       final child = divElement.nodes[0];
       if (child is! dom.Element) return null;
       if (child.localName != 'a') return null;
-      if (child.classes.isNotEmpty) return null;
+      if (child.className.isNotEmpty) return null;
 
       if (child.nodes.length != 1) return null;
       final grandchild = child.nodes[0];
       if (grandchild is! dom.Element) return null;
       if (grandchild.localName != 'img') return null;
-      if (grandchild.classes.isNotEmpty) return null;
+      if (grandchild.className.isNotEmpty) return null;
       return grandchild;
     }();
 
@@ -874,13 +874,12 @@ class _ZulipContentParser {
     final element = node;
     final localName = element.localName;
     final className = element.className;
-    final classes = element.classes;
 
-    if (localName == 'br' && classes.isEmpty) {
+    if (localName == 'br' && className.isEmpty) {
       return LineBreakNode(debugHtmlNode: debugHtmlNode);
     }
 
-    if (localName == 'p' && classes.isEmpty) {
+    if (localName == 'p' && className.isEmpty) {
       // Oddly, the way a math block gets encoded in Zulip HTML is inside a <p>.
       if (element.nodes case [dom.Element(localName: 'span') && var child, ...]) {
         if (child.className == 'katex-display') {
@@ -913,7 +912,7 @@ class _ZulipContentParser {
       case 'h5': headingLevel = HeadingLevel.h5; break;
       case 'h6': headingLevel = HeadingLevel.h6; break;
     }
-    if (headingLevel != null && classes.isEmpty) {
+    if (headingLevel != null && className.isEmpty) {
       final parsed = parseBlockInline(element.nodes);
       return HeadingNode(debugHtmlNode: debugHtmlNode,
         level: headingLevel,
@@ -921,11 +920,11 @@ class _ZulipContentParser {
         nodes: parsed.nodes);
     }
 
-    if ((localName == 'ol' || localName == 'ul') && classes.isEmpty) {
+    if ((localName == 'ol' || localName == 'ul') && className.isEmpty) {
       return parseListNode(element);
     }
 
-    if (localName == 'blockquote' && classes.isEmpty) {
+    if (localName == 'blockquote' && className.isEmpty) {
       return QuotationNode(debugHtmlNode: debugHtmlNode,
         parseBlockContentList(element.nodes));
     }

From d3da7f5a064612fb240529c02f8bac3d9a44eb63 Mon Sep 17 00:00:00 2001
From: Greg Price <greg@zulip.com>
Date: Fri, 2 Feb 2024 12:07:10 -0800
Subject: [PATCH 4/8] content [nfc]: Cut dead empty-class case for
 code-highlighting spans

This className string can never be empty, because it's an item from
a CssClassSet, and those consist of nonempty strings.  (If for
example the element's "class" attribute is absent, or empty, or
pure whitespace, then the resulting class set still has no empty
strings; it'll just be an empty set.)
---
 lib/model/code_block.dart | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/model/code_block.dart b/lib/model/code_block.dart
index 34c7c5506b..222981f6e1 100644
--- a/lib/model/code_block.dart
+++ b/lib/model/code_block.dart
@@ -174,7 +174,6 @@ enum CodeBlockSpanType {
 
 CodeBlockSpanType codeBlockSpanTypeFromClassName(String className) {
   return switch (className) {
-    '' => CodeBlockSpanType.text,
     'hll' => CodeBlockSpanType.highlightedLines,
     'w' => CodeBlockSpanType.whitespace,
     'esc' => CodeBlockSpanType.escape,

From 36feae2dfab8aec63367ae5ed45d520fbc63521f Mon Sep 17 00:00:00 2001
From: Greg Price <greg@zulip.com>
Date: Fri, 2 Feb 2024 12:12:45 -0800
Subject: [PATCH 5/8] content: Use className rather than class set, for
 CodeBlockSpanNode

This is NFC except in the case where className looks like "  foo  ",
with leading and/or trailing whitespace.  In that case, the previous
code would accept it (if the class name in the middle was one we
recognized), and this code will reject as unimplemented.

For cases where the previous code would reject because `classes.length`
was not 1, this code will reject because className won't be one that
codeBlockSpanTypeFromClassName recognizes.
---
 lib/model/content.dart | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lib/model/content.dart b/lib/model/content.dart
index e8e15c6729..e29dc365b3 100644
--- a/lib/model/content.dart
+++ b/lib/model/content.dart
@@ -807,9 +807,8 @@ class _ZulipContentParser {
           }
           span = CodeBlockSpanNode(text: text, type: CodeBlockSpanType.text);
 
-        case dom.Element(localName: 'span', :final text, :final classes)
-            when classes.length == 1:
-          final CodeBlockSpanType type = codeBlockSpanTypeFromClassName(classes.first);
+        case dom.Element(localName: 'span', :final text, :final className):
+          final CodeBlockSpanType type = codeBlockSpanTypeFromClassName(className);
           switch (type) {
             case CodeBlockSpanType.unknown:
               // TODO(#194): Show these as un-syntax-highlighted code, in production.

From d94df90b57af20c89953d3f317a34212b304341a Mon Sep 17 00:00:00 2001
From: Greg Price <greg@zulip.com>
Date: Fri, 2 Feb 2024 10:01:14 -0800
Subject: [PATCH 6/8] content: Use className regexp for user-mention, instead
 of class set

This is more efficient but should otherwise behave exactly the same,
except if the className has extraneous whitespace: the previous code
would accept that, and this code will reject as unimplemented.

To help verify that we've gotten this change right, add some more
tests, so that we cover all the different alternatives in this regexp.
---
 lib/model/content.dart       | 12 ++++++++----
 test/model/content_test.dart | 22 +++++++++++++++++++++-
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/lib/model/content.dart b/lib/model/content.dart
index e29dc365b3..c73931d4a2 100644
--- a/lib/model/content.dart
+++ b/lib/model/content.dart
@@ -632,6 +632,13 @@ class _ZulipContentParser {
     return result;
   }
 
+  static final _userMentionClassNameRegexp = () {
+    // This matches a class `user-mention` or `user-group-mention`,
+    // plus an optional class `silent`, appearing in either order.
+    const mentionClass = r"user(?:-group)?-mention";
+    return RegExp("^(?:$mentionClass(?: silent)?|silent $mentionClass)\$");
+  }();
+
   static final _emojiClassRegexp = RegExp(r"^emoji(-[0-9a-f]+)*$");
 
   InlineContentNode parseInlineContent(dom.Node node) {
@@ -676,10 +683,7 @@ class _ZulipContentParser {
     }
 
     if (localName == 'span'
-        && (classes.contains('user-mention')
-            || classes.contains('user-group-mention'))
-        && (classes.length == 1
-            || (classes.length == 2 && classes.contains('silent')))) {
+        && _userMentionClassNameRegexp.hasMatch(className)) {
       // TODO assert UserMentionNode can't contain LinkNode;
       //   either a debug-mode check, or perhaps we can make expectations much
       //   tighter on a UserMentionNode's contents overall.
diff --git a/test/model/content_test.dart b/test/model/content_test.dart
index 87c6c182e6..5805f5e22c 100644
--- a/test/model/content_test.dart
+++ b/test/model/content_test.dart
@@ -138,7 +138,27 @@ void main() {
       '<p><span class="user-mention silent" data-user-id="2187">Greg Price</span></p>',
       const UserMentionNode(nodes: [TextNode('Greg Price')]));
 
-    // TODO test group mentions and wildcard mentions
+    testParseInline('silent user @-mention, class order reversed',
+      // "@_**Greg Price**" (hypothetical server variation)
+      '<p><span class="silent user-mention" data-user-id="2187">Greg Price</span></p>',
+      const UserMentionNode(nodes: [TextNode('Greg Price')]));
+
+    testParseInline('plain group @-mention',
+      // "@*test-empty*"
+      '<p><span class="user-group-mention" data-user-group-id="186">@test-empty</span></p>',
+      const UserMentionNode(nodes: [TextNode('@test-empty')]));
+
+    testParseInline('silent group @-mention',
+      // "@_*test-empty*"
+      '<p><span class="user-group-mention silent" data-user-group-id="186">test-empty</span></p>',
+      const UserMentionNode(nodes: [TextNode('test-empty')]));
+
+    testParseInline('silent group @-mention, class order reversed',
+      // "@_*test-empty*" (hypothetical server variation)
+      '<p><span class="silent user-group-mention" data-user-group-id="186">test-empty</span></p>',
+      const UserMentionNode(nodes: [TextNode('test-empty')]));
+
+    // TODO test wildcard mentions
   });
 
   testParseInline('parse Unicode emoji, encoded in span element',

From 89ea1cd2c038a610974c29dfb3f54db34a50f4b7 Mon Sep 17 00:00:00 2001
From: Greg Price <greg@zulip.com>
Date: Fri, 2 Feb 2024 10:01:38 -0800
Subject: [PATCH 7/8] content [nfc]: Use non-capturing group in
 _emojiClassRegexp

We never look at the captured group, so it's more efficient to
not ask the regexp engine to capture it in the first place.
---
 lib/model/content.dart | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/model/content.dart b/lib/model/content.dart
index c73931d4a2..804cfb6d1a 100644
--- a/lib/model/content.dart
+++ b/lib/model/content.dart
@@ -639,7 +639,7 @@ class _ZulipContentParser {
     return RegExp("^(?:$mentionClass(?: silent)?|silent $mentionClass)\$");
   }();
 
-  static final _emojiClassRegexp = RegExp(r"^emoji(-[0-9a-f]+)*$");
+  static final _emojiClassRegexp = RegExp(r"^emoji(?:-[0-9a-f]+)*$");
 
   InlineContentNode parseInlineContent(dom.Node node) {
     assert(_debugParserContext == _ParserContext.inline);

From 0acde8558dc9f0aa500b0f2d20a6589d56b3be5b Mon Sep 17 00:00:00 2001
From: Greg Price <greg@zulip.com>
Date: Fri, 2 Feb 2024 10:24:26 -0800
Subject: [PATCH 8/8] content: Use className regexp for Unicode emoji, instead
 of class set

And add another test for more complete coverage.

This removes our last use of the `classes` class set,
so it completes #497.

Fixes: #497
---
 lib/model/content.dart       | 18 ++++++++----------
 test/model/content_test.dart |  5 +++++
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/lib/model/content.dart b/lib/model/content.dart
index 804cfb6d1a..0b0bb502bb 100644
--- a/lib/model/content.dart
+++ b/lib/model/content.dart
@@ -639,7 +639,11 @@ class _ZulipContentParser {
     return RegExp("^(?:$mentionClass(?: silent)?|silent $mentionClass)\$");
   }();
 
-  static final _emojiClassRegexp = RegExp(r"^emoji(?:-[0-9a-f]+)*$");
+  static final _emojiClassNameRegexp = () {
+    const specificEmoji = r"emoji(?:-[0-9a-f]+)+";
+    return RegExp("^(?:emoji $specificEmoji|$specificEmoji emoji)\$");
+  }();
+  static final _emojiCodeFromClassNameRegexp = RegExp(r"emoji-([^ ]+)");
 
   InlineContentNode parseInlineContent(dom.Node node) {
     assert(_debugParserContext == _ParserContext.inline);
@@ -655,7 +659,6 @@ class _ZulipContentParser {
 
     final element = node;
     final localName = element.localName;
-    final classes = element.classes;
     final className = element.className;
     List<InlineContentNode> nodes() => parseInlineContentList(element.nodes);
 
@@ -691,14 +694,9 @@ class _ZulipContentParser {
     }
 
     if (localName == 'span'
-        && classes.length == 2
-        && classes.contains('emoji')
-        && classes.every(_emojiClassRegexp.hasMatch)) {
-      final emojiCode = classes
-        .firstWhere((className) => className.startsWith('emoji-'))
-        .replaceFirst('emoji-', '');
-      assert(emojiCode.isNotEmpty);
-
+        && _emojiClassNameRegexp.hasMatch(className)) {
+      final emojiCode = _emojiCodeFromClassNameRegexp.firstMatch(className)!
+        .group(1)!;
       final unicode = tryParseEmojiCodeToUnicode(emojiCode);
       if (unicode == null) return unimplemented();
       return UnicodeEmojiNode(emojiUnicode: unicode, debugHtmlNode: debugHtmlNode);
diff --git a/test/model/content_test.dart b/test/model/content_test.dart
index 5805f5e22c..dc97c89b92 100644
--- a/test/model/content_test.dart
+++ b/test/model/content_test.dart
@@ -166,6 +166,11 @@ void main() {
     '<p><span aria-label="thumbs up" class="emoji emoji-1f44d" role="img" title="thumbs up">:thumbs_up:</span></p>',
     const UnicodeEmojiNode(emojiUnicode: '\u{1f44d}')); // "👍"
 
+  testParseInline('parse Unicode emoji, encoded in span element, class order reversed',
+    // ":thumbs_up:" (hypothetical server variation)
+    '<p><span aria-label="thumbs up" class="emoji-1f44d emoji" role="img" title="thumbs up">:thumbs_up:</span></p>',
+    const UnicodeEmojiNode(emojiUnicode: '\u{1f44d}')); // "👍"
+
   testParseInline('parse Unicode emoji, encoded in span element, multiple codepoints',
     // ":transgender_flag:"
     '<p><span aria-label="transgender flag" class="emoji emoji-1f3f3-fe0f-200d-26a7-fe0f" role="img" title="transgender flag">:transgender_flag:</span></p>',