From 9292d3c267375dd464ae981bdeb2227b07134ae6 Mon Sep 17 00:00:00 2001 From: Lucian Smith Date: Wed, 30 Apr 2025 14:56:25 -0700 Subject: [PATCH 1/6] Improve the emphasis regex. I was getting failures where URLs with underscores were getting blocks thrown in them, creating invalid HTML. --- include/maddy/emphasizedparser.h | 4 +- tests/maddy/test_maddy_emphasizedparser.cpp | 109 ++++++++++++++++++++ 2 files changed, 112 insertions(+), 1 deletion(-) diff --git a/include/maddy/emphasizedparser.h b/include/maddy/emphasizedparser.h index 1705e6f..af30928 100644 --- a/include/maddy/emphasizedparser.h +++ b/include/maddy/emphasizedparser.h @@ -40,8 +40,10 @@ class EmphasizedParser : public LineParser */ void Parse(std::string& line) override { + // Modifed from previous version, with help from + // https://stackoverflow.com/questions/61346949/regex-for-markdown-emphasis static std::regex re( - R"((?!.*`.*|.*.*)_(?!.*`.*|.*<\/code>.*)([^_]*)_(?!.*`.*|.*<\/code>.*))" + R"((?!.*`.*|.*.*)\b_(?![\s])(?!.*`.*|.*<\/code>.*)(.*?[^\s])_\b(?!.*`.*|.*<\/code>.*))" ); static std::string replacement = "$1"; diff --git a/tests/maddy/test_maddy_emphasizedparser.cpp b/tests/maddy/test_maddy_emphasizedparser.cpp index 6442779..8da0b4f 100644 --- a/tests/maddy/test_maddy_emphasizedparser.cpp +++ b/tests/maddy/test_maddy_emphasizedparser.cpp @@ -21,6 +21,85 @@ TEST(MADDY_EMPHASIZEDPARSER, ItReplacesMarkdownWithEmphasizedHTML) ASSERT_EQ(expected, text); } +TEST(MADDY_EMPHASIZEDPARSER, ItReplacesUnderscoresAtStringEdges) +{ + std::string text = "_some text_"; + std::string expected = "some text"; + auto emphasizedParser = std::make_shared(); + + emphasizedParser->Parse(text); + + ASSERT_EQ(expected, text); +} + +TEST(MADDY_EMPHASIZEDPARSER, ItDoesNotReplaceMarkdownWithInlineUnderscores) +{ + std::string text = "some text_bla_text testing _it_ out"; + std::string expected = "some text_bla_text testing it out"; + auto emphasizedParser = std::make_shared(); + + emphasizedParser->Parse(text); + + ASSERT_EQ(expected, text); +} + +TEST(MADDY_EMPHASIZEDPARSER, ItOnlyReplacesUnderscoresAtWordBreaks) +{ + std::string text = "some _text_bla_ testing _it_ out"; + std::string expected = "some text_bla testing it out"; + auto emphasizedParser = std::make_shared(); + + emphasizedParser->Parse(text); + + ASSERT_EQ(expected, text); +} + +TEST(MADDY_EMPHASIZEDPARSER, ItReplacesUnderscoresWithMultipleWords) +{ + std::string text = "some _text testing it_ out"; + std::string expected = "some text testing it out"; + auto emphasizedParser = std::make_shared(); + + emphasizedParser->Parse(text); + + ASSERT_EQ(expected, text); +} + +TEST(MADDY_EMPHASIZEDPARSER, ItAllowsDoubleUnderscores) +{ + // I'm not sure if this is standard or not, but this is how the github markdown + // parser behaves. Other things I've seen want it to *not* match. + std::string text = "some __text testing it_ out"; + std::string expected = "some _text testing it out"; + auto emphasizedParser = std::make_shared(); + + emphasizedParser->Parse(text); + + ASSERT_EQ(expected, text); +} + +TEST(MADDY_EMPHASIZEDPARSER, ItDoesntReplaceUnderscoresInsideCodeBlocks) +{ + std::string text = "Stuff inside blocks _shouldn't be emphasized_ at all"; + std::string expected = "Stuff inside blocks _shouldn't be emphasized_ at all"; + auto emphasizedParser = std::make_shared(); + + emphasizedParser->Parse(text); + + ASSERT_EQ(expected, text); +} + +TEST(MADDY_EMPHASIZEDPARSER, ItDoesNotReplaceUnderscoresInURLs) +{ + std::string text = "[Link Title](http://example.com/what_you_didn't_know)"; + std::string expected = "[Link Title](http://example.com/what_you_didn't_know)"; + auto emphasizedParser = std::make_shared(); + + emphasizedParser->Parse(text); + + ASSERT_EQ(expected, text); +} + TEST(MADDY_EMPHASIZEDPARSER, ItDoesNotParseInsideInlineCode) { std::string text = "some text `*bla*` `/**text*/` testing _it_ out"; @@ -32,3 +111,33 @@ TEST(MADDY_EMPHASIZEDPARSER, ItDoesNotParseInsideInlineCode) ASSERT_EQ(expected, text); } + +TEST(MADDY_EMPHASIZEDPARSER, ItParsesOutsideCodeBlocks) +{ + std::string text = + "Stuff inside blocks _shouldn't be emphasized_ " + " but outside _should_."; + std::string expected = + "Stuff inside blocks _shouldn't be emphasized_ " + " but outside should."; + auto emphasizedParser = std::make_shared(); + + emphasizedParser->Parse(text); + + ASSERT_EQ(expected, text); +} + +TEST(MADDY_EMPHASIZEDPARSER, ItParsesOutsideTickBlocks) +{ + std::string text = + "Stuff inside `blocks _shouldn't be emphasized_ `" + " but outside _should_."; + std::string expected = + "Stuff inside `blocks _shouldn't be emphasized_ `" + " but outside should."; + auto emphasizedParser = std::make_shared(); + + emphasizedParser->Parse(text); + + ASSERT_EQ(expected, text); +} From 2df2fe7fa6e6a68e199d1c0d3c8c993d94b218eb Mon Sep 17 00:00:00 2001 From: Lucian Smith Date: Wed, 30 Apr 2025 16:23:19 -0700 Subject: [PATCH 2/6] Add strong parser tests. Most don't pass (and are disabled), but should. Unfortunately, adding a word boundary (\b) to the strong regex works for these tests, but somehow the full parser then breaks. The regex that I believed should work is added as a comment, for anyone wishing to make things work going forward. --- include/maddy/strongparser.h | 23 ++++- tests/maddy/test_maddy_strongparser.cpp | 113 ++++++++++++++++++++++++ 2 files changed, 134 insertions(+), 2 deletions(-) diff --git a/include/maddy/strongparser.h b/include/maddy/strongparser.h index 348f2d4..56df620 100644 --- a/include/maddy/strongparser.h +++ b/include/maddy/strongparser.h @@ -40,12 +40,31 @@ class StrongParser : public LineParser */ void Parse(std::string& line) override { + // This version of the regex is changed exactly the same way + // that the regex for the emphasized parser was changed, and + // it then passes all the 'disabled' tests in the 'strong parser' + // test, but then it fails general parsing. For some reason, + // "__text__" translates "text" even though there + // are no word boundaries at the correct places. It's weird! + // + //static std::vector res{ + // std::regex{ + // R"((?!.*`.*|.*.*)\b\*\*(?![\s])(?!.*`.*|.*<\/code>.*)" + // "(.*?[^\s])\*\*\b(?!.*`.*|.*<\/code>.*))" + // }, + // std::regex{ + // R"((?!.*`.*|.*.*)\b__(?![\s])(?!.*`.*|.*<\/code>.*)" + // "(.*?[^\s])__\b(?!.*`.*|.*<\/code>.*))" + // } + //}; static std::vector res{ std::regex{ - R"((?!.*`.*|.*.*)\*\*(?!.*`.*|.*<\/code>.*)([^\*\*]*)\*\*(?!.*`.*|.*<\/code>.*))" + R"((?!.*`.*|.*.*)\*\*(?!.*`.*|.*<\/code>.*)" + "([^\*\*]*)\*\*(?!.*`.*|.*<\/code>.*))" }, std::regex{ - R"((?!.*`.*|.*.*)__(?!.*`.*|.*<\/code>.*)([^__]*)__(?!.*`.*|.*<\/code>.*))" + R"((?!.*`.*|.*.*)__(?!.*`.*|.*<\/code>.*)" + "([^__]*)__(?!.*`.*|.*<\/code>.*))" } }; static std::string replacement = "$1"; diff --git a/tests/maddy/test_maddy_strongparser.cpp b/tests/maddy/test_maddy_strongparser.cpp index f006e26..5fd962f 100644 --- a/tests/maddy/test_maddy_strongparser.cpp +++ b/tests/maddy/test_maddy_strongparser.cpp @@ -83,3 +83,116 @@ TEST(MADDY_STRONGPARSER, ItDoesNotParseInsideInlineCode) ASSERT_EQ(test.expected, test.text); } } + +TEST(MADDY_STRONGPARSER, ItReplacesUnderscoresAtStringEdges) +{ + std::string text = "__some text__"; + std::string expected = "some text"; + auto strongParser = std::make_shared(); + + strongParser->Parse(text); + + ASSERT_EQ(expected, text); +} + +TEST(DISABLED_MADDY_STRONGPARSER, ItDoesNotReplaceMarkdownWithInlineUnderscores) +{ + std::string text = "some text__bla__text testing __it__ out"; + std::string expected = "some text__bla__text testing it out"; + auto strongParser = std::make_shared(); + + strongParser->Parse(text); + + ASSERT_EQ(expected, text); +} + +TEST(DISABLED_MADDY_STRONGPARSER, ItOnlyReplacesUnderscoresAtWordBreaks) +{ + std::string text = "some __text__bla__ testing __it__ out"; + std::string expected = + "some text__bla testing it out"; + auto strongParser = std::make_shared(); + + strongParser->Parse(text); + + ASSERT_EQ(expected, text); +} + +TEST(MADDY_STRONGPARSER, ItReplacesUnderscoresWithMultipleWords) +{ + std::string text = "some __text testing it__ out"; + std::string expected = "some text testing it out"; + auto strongParser = std::make_shared(); + + strongParser->Parse(text); + + ASSERT_EQ(expected, text); +} + +TEST(DISABLED_MADDY_STRONGPARSER, ItAllowsTripleUnderscores) +{ + // I'm not sure if this is standard or not, but this is how the github + // markdown parser behaves. Other things I've seen want it to *not* match. + std::string text = "some ___text testing it__ out"; + std::string expected = "some _text testing it out"; + auto strongParser = std::make_shared(); + + strongParser->Parse(text); + + ASSERT_EQ(expected, text); +} + +TEST(MADDY_STRONGPARSER, ItDoesntReplaceUnderscoresInsideCodeBlocks) +{ + std::string text = + "Stuff inside blocks __shouldn't be strong__ at all"; + std::string expected = + "Stuff inside blocks __shouldn't be strong__ at all"; + auto strongParser = std::make_shared(); + + strongParser->Parse(text); + + ASSERT_EQ(expected, text); +} + +TEST(DISABLED_MADDY_STRONGPARSER, ItDoesNotReplaceUnderscoresInURLs) +{ + std::string text = "[Link Title](http://example.com/what__you__didn't__know)"; + std::string expected = + "[Link Title](http://example.com/what__you__didn't__know)"; + auto strongParser = std::make_shared(); + + strongParser->Parse(text); + + ASSERT_EQ(expected, text); +} + +TEST(MADDY_STRONGPARSER, ItParsesOutsideCodeBlocks) +{ + std::string text = + "Stuff inside blocks __shouldn't be strong__ " + " but outside __should__."; + std::string expected = + "Stuff inside blocks __shouldn't be strong__ " + " but outside should."; + auto strongParser = std::make_shared(); + + strongParser->Parse(text); + + ASSERT_EQ(expected, text); +} + +TEST(MADDY_STRONGPARSER, ItParsesOutsideTickBlocks) +{ + std::string text = + "Stuff inside `blocks __shouldn't be strong__ `" + " but outside __should__."; + std::string expected = + "Stuff inside `blocks __shouldn't be strong__ `" + " but outside should."; + auto strongParser = std::make_shared(); + + strongParser->Parse(text); + + ASSERT_EQ(expected, text); +} From c89a98386501946ebabbc62e3a2911c4c101950e Mon Sep 17 00:00:00 2001 From: Lucian Smith Date: Wed, 30 Apr 2025 16:30:39 -0700 Subject: [PATCH 3/6] Clang fixes; changelog. --- CHANGELOG.md | 1 + include/maddy/strongparser.h | 28 ++++++++++++------------- tests/maddy/test_maddy_strongparser.cpp | 4 +++- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 63c0fca..25072a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ maddy uses [semver versioning](https://semver.org/). ## Upcoming +* ![**FIXED**](https://img.shields.io/badge/-FIXED-%23090) Do not create emphasis tags not at word boundaries, i.e. `only_internal_underscores`. * ... ## version 1.5.0 2025-04-21 diff --git a/include/maddy/strongparser.h b/include/maddy/strongparser.h index 56df620..5cf9c38 100644 --- a/include/maddy/strongparser.h +++ b/include/maddy/strongparser.h @@ -46,24 +46,22 @@ class StrongParser : public LineParser // test, but then it fails general parsing. For some reason, // "__text__" translates "text" even though there // are no word boundaries at the correct places. It's weird! - // - //static std::vector res{ - // std::regex{ - // R"((?!.*`.*|.*.*)\b\*\*(?![\s])(?!.*`.*|.*<\/code>.*)" - // "(.*?[^\s])\*\*\b(?!.*`.*|.*<\/code>.*))" - // }, - // std::regex{ - // R"((?!.*`.*|.*.*)\b__(?![\s])(?!.*`.*|.*<\/code>.*)" - // "(.*?[^\s])__\b(?!.*`.*|.*<\/code>.*))" - // } - //}; + + // static std::vector res{ + // std::regex{ + // R"((?!.*`.*|.*.*)\b\*\*(?![\s])(?!.*`.*|.*<\/code>.*)" + // "(.*?[^\s])\*\*\b(?!.*`.*|.*<\/code>.*))" + // }, + // std::regex{ + // R"((?!.*`.*|.*.*)\b__(?![\s])(?!.*`.*|.*<\/code>.*)" + // "(.*?[^\s])__\b(?!.*`.*|.*<\/code>.*))" + // } + // }; static std::vector res{ - std::regex{ - R"((?!.*`.*|.*.*)\*\*(?!.*`.*|.*<\/code>.*)" + std::regex{R"((?!.*`.*|.*.*)\*\*(?!.*`.*|.*<\/code>.*)" "([^\*\*]*)\*\*(?!.*`.*|.*<\/code>.*))" }, - std::regex{ - R"((?!.*`.*|.*.*)__(?!.*`.*|.*<\/code>.*)" + std::regex{R"((?!.*`.*|.*.*)__(?!.*`.*|.*<\/code>.*)" "([^__]*)__(?!.*`.*|.*<\/code>.*))" } }; diff --git a/tests/maddy/test_maddy_strongparser.cpp b/tests/maddy/test_maddy_strongparser.cpp index 5fd962f..466068d 100644 --- a/tests/maddy/test_maddy_strongparser.cpp +++ b/tests/maddy/test_maddy_strongparser.cpp @@ -132,7 +132,9 @@ TEST(MADDY_STRONGPARSER, ItReplacesUnderscoresWithMultipleWords) TEST(DISABLED_MADDY_STRONGPARSER, ItAllowsTripleUnderscores) { // I'm not sure if this is standard or not, but this is how the github - // markdown parser behaves. Other things I've seen want it to *not* match. + // markdown parser behaves. Other things I've seen want it to *not* + // match. + std::string text = "some ___text testing it__ out"; std::string expected = "some _text testing it out"; auto strongParser = std::make_shared(); From 1a12c2c3ea218b3a7d4e4b7e7e86b1e64f0d9576 Mon Sep 17 00:00:00 2001 From: Lucian Smith Date: Wed, 30 Apr 2025 16:34:08 -0700 Subject: [PATCH 4/6] More clang fixes. --- include/maddy/strongparser.h | 6 ++---- tests/maddy/test_maddy_emphasizedparser.cpp | 14 +++++++++----- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/include/maddy/strongparser.h b/include/maddy/strongparser.h index 5cf9c38..8a66d6e 100644 --- a/include/maddy/strongparser.h +++ b/include/maddy/strongparser.h @@ -59,11 +59,9 @@ class StrongParser : public LineParser // }; static std::vector res{ std::regex{R"((?!.*`.*|.*.*)\*\*(?!.*`.*|.*<\/code>.*)" - "([^\*\*]*)\*\*(?!.*`.*|.*<\/code>.*))" - }, + "([^\*\*]*)\*\*(?!.*`.*|.*<\/code>.*))"}, std::regex{R"((?!.*`.*|.*.*)__(?!.*`.*|.*<\/code>.*)" - "([^__]*)__(?!.*`.*|.*<\/code>.*))" - } + "([^__]*)__(?!.*`.*|.*<\/code>.*))"} }; static std::string replacement = "$1"; for (const auto& re : res) diff --git a/tests/maddy/test_maddy_emphasizedparser.cpp b/tests/maddy/test_maddy_emphasizedparser.cpp index 8da0b4f..a70c248 100644 --- a/tests/maddy/test_maddy_emphasizedparser.cpp +++ b/tests/maddy/test_maddy_emphasizedparser.cpp @@ -67,8 +67,9 @@ TEST(MADDY_EMPHASIZEDPARSER, ItReplacesUnderscoresWithMultipleWords) TEST(MADDY_EMPHASIZEDPARSER, ItAllowsDoubleUnderscores) { - // I'm not sure if this is standard or not, but this is how the github markdown - // parser behaves. Other things I've seen want it to *not* match. + // I'm not sure if this is standard or not, but this is how the github + // markdown parser behaves. Other things I've seen want it to *not* + // match. std::string text = "some __text testing it_ out"; std::string expected = "some _text testing it out"; auto emphasizedParser = std::make_shared(); @@ -80,8 +81,10 @@ TEST(MADDY_EMPHASIZEDPARSER, ItAllowsDoubleUnderscores) TEST(MADDY_EMPHASIZEDPARSER, ItDoesntReplaceUnderscoresInsideCodeBlocks) { - std::string text = "Stuff inside blocks _shouldn't be emphasized_ at all"; - std::string expected = "Stuff inside blocks _shouldn't be emphasized_ at all"; + std::string text = + "Stuff inside blocks _shouldn't be emphasized_ at all"; + std::string expected = + "Stuff inside blocks _shouldn't be emphasized_ at all"; auto emphasizedParser = std::make_shared(); emphasizedParser->Parse(text); @@ -92,7 +95,8 @@ TEST(MADDY_EMPHASIZEDPARSER, ItDoesntReplaceUnderscoresInsideCodeBlocks) TEST(MADDY_EMPHASIZEDPARSER, ItDoesNotReplaceUnderscoresInURLs) { std::string text = "[Link Title](http://example.com/what_you_didn't_know)"; - std::string expected = "[Link Title](http://example.com/what_you_didn't_know)"; + std::string expected = + "[Link Title](http://example.com/what_you_didn't_know)"; auto emphasizedParser = std::make_shared(); emphasizedParser->Parse(text); From 8e2c44c337719b082cb445157d00737522bd7dc0 Mon Sep 17 00:00:00 2001 From: Lucian Smith Date: Wed, 30 Apr 2025 16:36:31 -0700 Subject: [PATCH 5/6] A clang fix turned out to break stuff! --- include/maddy/strongparser.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/maddy/strongparser.h b/include/maddy/strongparser.h index 8a66d6e..a28d18b 100644 --- a/include/maddy/strongparser.h +++ b/include/maddy/strongparser.h @@ -58,10 +58,12 @@ class StrongParser : public LineParser // } // }; static std::vector res{ - std::regex{R"((?!.*`.*|.*.*)\*\*(?!.*`.*|.*<\/code>.*)" - "([^\*\*]*)\*\*(?!.*`.*|.*<\/code>.*))"}, - std::regex{R"((?!.*`.*|.*.*)__(?!.*`.*|.*<\/code>.*)" - "([^__]*)__(?!.*`.*|.*<\/code>.*))"} + std::regex{ + R"((?!.*`.*|.*.*)\*\*(?!.*`.*|.*<\/code>.*)([^\*\*]*)\*\*(?!.*`.*|.*<\/code>.*))" + }, + std::regex{ + R"((?!.*`.*|.*.*)__(?!.*`.*|.*<\/code>.*)([^__]*)__(?!.*`.*|.*<\/code>.*))" + } }; static std::string replacement = "$1"; for (const auto& re : res) From 30d6cf9d7ab3fd2e62acd1e18073f29dfe7008e4 Mon Sep 17 00:00:00 2001 From: Lucian Smith Date: Wed, 30 Apr 2025 16:38:07 -0700 Subject: [PATCH 6/6] Fixed double negative phrasing. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 25072a3..251add8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ maddy uses [semver versioning](https://semver.org/). ## Upcoming -* ![**FIXED**](https://img.shields.io/badge/-FIXED-%23090) Do not create emphasis tags not at word boundaries, i.e. `only_internal_underscores`. +* ![**FIXED**](https://img.shields.io/badge/-FIXED-%23090) Only create emphasis tags at word boundaries, i.e. `not only_internal_underscores`. * ... ## version 1.5.0 2025-04-21