From 017f97808cf59701e2d177da6dc18c1c36e02f96 Mon Sep 17 00:00:00 2001 From: Peter Plantinga Date: Thu, 9 Apr 2020 17:19:49 -0400 Subject: [PATCH 01/12] YAML parse non-word characters as part of tags #2486 --- src/languages/yaml.js | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/languages/yaml.js b/src/languages/yaml.js index d7586eb1f3..e504cd3de6 100644 --- a/src/languages/yaml.js +++ b/src/languages/yaml.js @@ -10,6 +10,9 @@ Category: common, config export default function(hljs) { var LITERALS = 'true false yes no null'; + // YAML spec allows non-reserved characters in tags + var YAML_TAG_RE = '[a-zA-Z_.~@#$%():;+=-?/]\\w*' + // Define keys as starting with a word character // ...containing word chars, spaces, colons, forward-slashes, hyphens and periods // ...and ending with a colon followed immediately by a space, tab or newline. @@ -81,11 +84,11 @@ export default function(hljs) { }, { // local tags className: 'type', - begin: '!' + hljs.UNDERSCORE_IDENT_RE, + begin: '!' + YAML_TAG_RE, }, { // data type className: 'type', - begin: '!!' + hljs.UNDERSCORE_IDENT_RE, + begin: '!!' + YAML_TAG_RE, }, { // fragment id &ref className: 'meta', From 6a5668c0a903316546e4daacc1ac314fb7c7876f Mon Sep 17 00:00:00 2001 From: Peter Plantinga Date: Thu, 9 Apr 2020 17:29:06 -0400 Subject: [PATCH 02/12] Add message for #2486 to CHANGES.md --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 4f9602069c..5c7a228108 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -57,6 +57,7 @@ Language Improvements: - enh(plaintext) added `text` and `txt` as alias (#2360) [Taufik Nurrohman][] - enh(powershell) added PowerShell v5.1/v7 default aliases as "built_in"s (#2423) [Sean Williams][] - enh(yaml) added support for timestamps (#2475) [Peter Plantinga][] +- fix(yaml) Fix tags to include non-word characters (#2486) [Peter Plantinga][] Developer Tools: From bfcbbd88127ed3b8e158b0cc7b409da90fd962dd Mon Sep 17 00:00:00 2001 From: Peter Plantinga Date: Thu, 9 Apr 2020 21:40:22 -0400 Subject: [PATCH 03/12] YAML add all characters for matching tags, plus tests --- src/languages/yaml.js | 8 ++------ test/markup/yaml/tag.expect.txt | 4 ++++ test/markup/yaml/tag.txt | 4 ++++ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/languages/yaml.js b/src/languages/yaml.js index e504cd3de6..7ae32e6de6 100644 --- a/src/languages/yaml.js +++ b/src/languages/yaml.js @@ -11,7 +11,7 @@ export default function(hljs) { var LITERALS = 'true false yes no null'; // YAML spec allows non-reserved characters in tags - var YAML_TAG_RE = '[a-zA-Z_.~@#$%():;+=-?/]\\w*' + var YAML_TAG_RE = '[\w#;/?:@&=+$,_.!~*\'()[\]]+' // Define keys as starting with a word character // ...containing word chars, spaces, colons, forward-slashes, hyphens and periods @@ -82,14 +82,10 @@ export default function(hljs) { excludeEnd: true, relevance: 0 }, - { // local tags + { // tags className: 'type', begin: '!' + YAML_TAG_RE, }, - { // data type - className: 'type', - begin: '!!' + YAML_TAG_RE, - }, { // fragment id &ref className: 'meta', begin: '&' + hljs.UNDERSCORE_IDENT_RE + '$', diff --git a/test/markup/yaml/tag.expect.txt b/test/markup/yaml/tag.expect.txt index dbc5645dcd..7dde36c684 100644 --- a/test/markup/yaml/tag.expect.txt +++ b/test/markup/yaml/tag.expect.txt @@ -2,3 +2,7 @@ key: !localtagname test key: "!notatag" key: '!!notatageither' +key: !!python/dict test +key: !!python/name:module.name test +key: !foo2.bar test +key: !(foo.bar?):tag test diff --git a/test/markup/yaml/tag.txt b/test/markup/yaml/tag.txt index 35f361543d..f186f91d28 100644 --- a/test/markup/yaml/tag.txt +++ b/test/markup/yaml/tag.txt @@ -2,3 +2,7 @@ key: !!builtintagname test key: !localtagname test key: "!notatag" key: '!!notatageither' +key: !!python/dict test +key: !!python/name:module.name test +key: !foo2.bar test +key: !(foo.bar?):tag test From 53ce6e02b12486ef2b00754121781c7fda5e2d47 Mon Sep 17 00:00:00 2001 From: Peter Plantinga Date: Fri, 10 Apr 2020 11:46:22 -0400 Subject: [PATCH 04/12] YAML tags double escape characters --- src/languages/yaml.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/languages/yaml.js b/src/languages/yaml.js index 7ae32e6de6..ed245a45ea 100644 --- a/src/languages/yaml.js +++ b/src/languages/yaml.js @@ -11,7 +11,7 @@ export default function(hljs) { var LITERALS = 'true false yes no null'; // YAML spec allows non-reserved characters in tags - var YAML_TAG_RE = '[\w#;/?:@&=+$,_.!~*\'()[\]]+' + var YAML_TAG_RE = '[\\w#;/?:@&=+$,.!~*\\\'()[\\]]+' // Define keys as starting with a word character // ...containing word chars, spaces, colons, forward-slashes, hyphens and periods From 1992e51b74537dfdf099f6a7097a7c0bd2f1fe96 Mon Sep 17 00:00:00 2001 From: Peter Plantinga Date: Mon, 13 Apr 2020 18:31:37 -0400 Subject: [PATCH 05/12] YAML add back data type tags and disallow "!" in first character of tags --- src/languages/yaml.js | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/languages/yaml.js b/src/languages/yaml.js index ed245a45ea..c8c4496cb8 100644 --- a/src/languages/yaml.js +++ b/src/languages/yaml.js @@ -11,7 +11,8 @@ export default function(hljs) { var LITERALS = 'true false yes no null'; // YAML spec allows non-reserved characters in tags - var YAML_TAG_RE = '[\\w#;/?:@&=+$,.!~*\\\'()[\\]]+' + var NON_EX_CHARS = '\\w#;/?:@&=+$,.~*\\\'()[\\]' + var YAML_TAG_RE = '[' + NON_EX_CHARS + '][' + NON_EX_CHARS + '!]*' // Define keys as starting with a word character // ...containing word chars, spaces, colons, forward-slashes, hyphens and periods @@ -82,10 +83,14 @@ export default function(hljs) { excludeEnd: true, relevance: 0 }, - { // tags + { // local tags className: 'type', begin: '!' + YAML_TAG_RE, }, + { // data type + className: 'type', + begin: '!!' + YAML_TAG_RE, + }, { // fragment id &ref className: 'meta', begin: '&' + hljs.UNDERSCORE_IDENT_RE + '$', From a14893b30d6d7a680f95c992c7a405cc4b13452c Mon Sep 17 00:00:00 2001 From: Peter Plantinga Date: Tue, 14 Apr 2020 10:49:30 -0400 Subject: [PATCH 06/12] YAML clarify tag definition --- src/languages/yaml.js | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/languages/yaml.js b/src/languages/yaml.js index c8c4496cb8..68dacee2b9 100644 --- a/src/languages/yaml.js +++ b/src/languages/yaml.js @@ -10,9 +10,12 @@ Category: common, config export default function(hljs) { var LITERALS = 'true false yes no null'; - // YAML spec allows non-reserved characters in tags - var NON_EX_CHARS = '\\w#;/?:@&=+$,.~*\\\'()[\\]' - var YAML_TAG_RE = '[' + NON_EX_CHARS + '][' + NON_EX_CHARS + '!]*' + // YAML spec allows non-reserved characters in tags, since they are intended + // to be used sometimes as URIs. In order to differentiate between "local" + // tags, prefixed with `!` and global tags, prefixed with `!!`, don't allow + // the prefix char `!` in the first char of the expression. + var NON_EXCLAMATION_CHARS = '\\w#;/?:@&=+$,.~*\\\'()[\\]' + var YAML_TAG_RE = '[' + NON_EXCLAMATION_CHARS + '][' + NON_EXCLAMATION_CHARS + '!]*' // Define keys as starting with a word character // ...containing word chars, spaces, colons, forward-slashes, hyphens and periods From 7e92cd0722a54017a210712a5945450658a03c00 Mon Sep 17 00:00:00 2001 From: Peter Plantinga Date: Fri, 17 Apr 2020 12:51:05 -0400 Subject: [PATCH 07/12] Add named tag handles and clarify definition --- src/languages/yaml.js | 15 ++++++++------- test/markup/yaml/tag.expect.txt | 1 + test/markup/yaml/tag.txt | 1 + 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/languages/yaml.js b/src/languages/yaml.js index 68dacee2b9..166ae8da6c 100644 --- a/src/languages/yaml.js +++ b/src/languages/yaml.js @@ -10,12 +10,9 @@ Category: common, config export default function(hljs) { var LITERALS = 'true false yes no null'; - // YAML spec allows non-reserved characters in tags, since they are intended - // to be used sometimes as URIs. In order to differentiate between "local" - // tags, prefixed with `!` and global tags, prefixed with `!!`, don't allow - // the prefix char `!` in the first char of the expression. - var NON_EXCLAMATION_CHARS = '\\w#;/?:@&=+$,.~*\\\'()[\\]' - var YAML_TAG_RE = '[' + NON_EXCLAMATION_CHARS + '][' + NON_EXCLAMATION_CHARS + '!]*' + // YAML spec allows most URI characters as a part of tags. + // see https://yaml.org/spec/1.2/spec.html#id2764295 + var YAML_TAG_RE = '[\\w#;/?:@&=+$,.~*\\\'()[\\]]+' // Define keys as starting with a word character // ...containing word chars, spaces, colons, forward-slashes, hyphens and periods @@ -90,10 +87,14 @@ export default function(hljs) { className: 'type', begin: '!' + YAML_TAG_RE, }, - { // data type + { // "global" tags (data types) className: 'type', begin: '!!' + YAML_TAG_RE, }, + { // "named" tags + className: 'type', + begin: '!\w+!' + YAML_TAG_RE, + }, { // fragment id &ref className: 'meta', begin: '&' + hljs.UNDERSCORE_IDENT_RE + '$', diff --git a/test/markup/yaml/tag.expect.txt b/test/markup/yaml/tag.expect.txt index 7dde36c684..a4d51a4602 100644 --- a/test/markup/yaml/tag.expect.txt +++ b/test/markup/yaml/tag.expect.txt @@ -6,3 +6,4 @@ key: !!python/name:module.name test key: !foo2.bar test key: !(foo.bar?):tag test +key: !foo!bar test diff --git a/test/markup/yaml/tag.txt b/test/markup/yaml/tag.txt index f186f91d28..f592c36b97 100644 --- a/test/markup/yaml/tag.txt +++ b/test/markup/yaml/tag.txt @@ -6,3 +6,4 @@ key: !!python/dict test key: !!python/name:module.name test key: !foo2.bar test key: !(foo.bar?):tag test +key: !foo!bar test From 3466812780ff35a43dc0acac870b060477fcefa8 Mon Sep 17 00:00:00 2001 From: Peter Plantinga Date: Fri, 17 Apr 2020 13:03:39 -0400 Subject: [PATCH 08/12] YAML fix order of applying regexes for tags --- src/languages/yaml.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/languages/yaml.js b/src/languages/yaml.js index 166ae8da6c..69b1379307 100644 --- a/src/languages/yaml.js +++ b/src/languages/yaml.js @@ -83,17 +83,17 @@ export default function(hljs) { excludeEnd: true, relevance: 0 }, - { // local tags + { // "named" tags className: 'type', - begin: '!' + YAML_TAG_RE, + begin: '!\w+!' + YAML_TAG_RE, }, { // "global" tags (data types) className: 'type', begin: '!!' + YAML_TAG_RE, }, - { // "named" tags + { // local tags className: 'type', - begin: '!\w+!' + YAML_TAG_RE, + begin: '!' + YAML_TAG_RE, }, { // fragment id &ref className: 'meta', From 79393229776290346e07d353ab1a93764ca544ac Mon Sep 17 00:00:00 2001 From: Peter Plantinga Date: Fri, 17 Apr 2020 13:11:34 -0400 Subject: [PATCH 09/12] Revert adding named tag handles, since they are captured by the local tag handles --- src/languages/yaml.js | 19 +++++++++---------- test/markup/yaml/tag.expect.txt | 1 - test/markup/yaml/tag.txt | 1 - 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/languages/yaml.js b/src/languages/yaml.js index 69b1379307..68dacee2b9 100644 --- a/src/languages/yaml.js +++ b/src/languages/yaml.js @@ -10,9 +10,12 @@ Category: common, config export default function(hljs) { var LITERALS = 'true false yes no null'; - // YAML spec allows most URI characters as a part of tags. - // see https://yaml.org/spec/1.2/spec.html#id2764295 - var YAML_TAG_RE = '[\\w#;/?:@&=+$,.~*\\\'()[\\]]+' + // YAML spec allows non-reserved characters in tags, since they are intended + // to be used sometimes as URIs. In order to differentiate between "local" + // tags, prefixed with `!` and global tags, prefixed with `!!`, don't allow + // the prefix char `!` in the first char of the expression. + var NON_EXCLAMATION_CHARS = '\\w#;/?:@&=+$,.~*\\\'()[\\]' + var YAML_TAG_RE = '[' + NON_EXCLAMATION_CHARS + '][' + NON_EXCLAMATION_CHARS + '!]*' // Define keys as starting with a word character // ...containing word chars, spaces, colons, forward-slashes, hyphens and periods @@ -83,18 +86,14 @@ export default function(hljs) { excludeEnd: true, relevance: 0 }, - { // "named" tags + { // local tags className: 'type', - begin: '!\w+!' + YAML_TAG_RE, + begin: '!' + YAML_TAG_RE, }, - { // "global" tags (data types) + { // data type className: 'type', begin: '!!' + YAML_TAG_RE, }, - { // local tags - className: 'type', - begin: '!' + YAML_TAG_RE, - }, { // fragment id &ref className: 'meta', begin: '&' + hljs.UNDERSCORE_IDENT_RE + '$', diff --git a/test/markup/yaml/tag.expect.txt b/test/markup/yaml/tag.expect.txt index a4d51a4602..7dde36c684 100644 --- a/test/markup/yaml/tag.expect.txt +++ b/test/markup/yaml/tag.expect.txt @@ -6,4 +6,3 @@ key: !!python/name:module.name test key: !foo2.bar test key: !(foo.bar?):tag test -key: !foo!bar test diff --git a/test/markup/yaml/tag.txt b/test/markup/yaml/tag.txt index f592c36b97..f186f91d28 100644 --- a/test/markup/yaml/tag.txt +++ b/test/markup/yaml/tag.txt @@ -6,4 +6,3 @@ key: !!python/dict test key: !!python/name:module.name test key: !foo2.bar test key: !(foo.bar?):tag test -key: !foo!bar test From 6ca73145493fc4e15816a8d219d17f8d08349b68 Mon Sep 17 00:00:00 2001 From: Peter Plantinga Date: Sat, 18 Apr 2020 14:15:52 -0400 Subject: [PATCH 10/12] Add named tag again --- src/languages/yaml.js | 16 ++++++++-------- test/markup/yaml/tag.expect.txt | 1 + test/markup/yaml/tag.txt | 1 + 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/languages/yaml.js b/src/languages/yaml.js index 68dacee2b9..79ae69f7ba 100644 --- a/src/languages/yaml.js +++ b/src/languages/yaml.js @@ -10,12 +10,8 @@ Category: common, config export default function(hljs) { var LITERALS = 'true false yes no null'; - // YAML spec allows non-reserved characters in tags, since they are intended - // to be used sometimes as URIs. In order to differentiate between "local" - // tags, prefixed with `!` and global tags, prefixed with `!!`, don't allow - // the prefix char `!` in the first char of the expression. - var NON_EXCLAMATION_CHARS = '\\w#;/?:@&=+$,.~*\\\'()[\\]' - var YAML_TAG_RE = '[' + NON_EXCLAMATION_CHARS + '][' + NON_EXCLAMATION_CHARS + '!]*' + // YAML spec allows non-reserved URI characters in tags. + var URI_CHARACTERS = '[\\w#;/?:@&=+$,.~*\\\'()[\\]]+' // Define keys as starting with a word character // ...containing word chars, spaces, colons, forward-slashes, hyphens and periods @@ -86,13 +82,17 @@ export default function(hljs) { excludeEnd: true, relevance: 0 }, + { // named tags + className: 'type', + begin: '!\\w+!' + URI_CHARACTERS, + }, { // local tags className: 'type', - begin: '!' + YAML_TAG_RE, + begin: '!' + URI_CHARACTERS, }, { // data type className: 'type', - begin: '!!' + YAML_TAG_RE, + begin: '!!' + URI_CHARACTERS, }, { // fragment id &ref className: 'meta', diff --git a/test/markup/yaml/tag.expect.txt b/test/markup/yaml/tag.expect.txt index 7dde36c684..7253fa5769 100644 --- a/test/markup/yaml/tag.expect.txt +++ b/test/markup/yaml/tag.expect.txt @@ -6,3 +6,4 @@ key: !!python/name:module.name test key: !foo2.bar test key: !(foo.bar?):tag test +key: !named!tag test diff --git a/test/markup/yaml/tag.txt b/test/markup/yaml/tag.txt index f186f91d28..7a11d86b05 100644 --- a/test/markup/yaml/tag.txt +++ b/test/markup/yaml/tag.txt @@ -6,3 +6,4 @@ key: !!python/dict test key: !!python/name:module.name test key: !foo2.bar test key: !(foo.bar?):tag test +key: !named!tag test From 05471b9feb09e2c72b90f43e65a54df2088e3b58 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Tue, 21 Apr 2020 19:00:15 -0400 Subject: [PATCH 11/12] fixup comments, add verbatim tags --- src/languages/yaml.js | 9 +++++++-- test/markup/yaml/tag.expect.txt | 3 +++ test/markup/yaml/tag.txt | 3 +++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/languages/yaml.js b/src/languages/yaml.js index 79ae69f7ba..4fc4ba0a28 100644 --- a/src/languages/yaml.js +++ b/src/languages/yaml.js @@ -86,11 +86,16 @@ export default function(hljs) { className: 'type', begin: '!\\w+!' + URI_CHARACTERS, }, - { // local tags + // https://yaml.org/spec/1.2/spec.html#id2784064 + { // verbatim tags + className: 'type', + begin: '!<' + URI_CHARACTERS + ">", + }, + { // primary tags className: 'type', begin: '!' + URI_CHARACTERS, }, - { // data type + { // secondary tags className: 'type', begin: '!!' + URI_CHARACTERS, }, diff --git a/test/markup/yaml/tag.expect.txt b/test/markup/yaml/tag.expect.txt index 7253fa5769..87168452e3 100644 --- a/test/markup/yaml/tag.expect.txt +++ b/test/markup/yaml/tag.expect.txt @@ -7,3 +7,6 @@ key: !foo2.bar test key: !(foo.bar?):tag test key: !named!tag test + +--- !<tag:clarkevans.com,2002:invoice> +invoice: 34843 diff --git a/test/markup/yaml/tag.txt b/test/markup/yaml/tag.txt index 7a11d86b05..20ee84a731 100644 --- a/test/markup/yaml/tag.txt +++ b/test/markup/yaml/tag.txt @@ -7,3 +7,6 @@ key: !!python/name:module.name test key: !foo2.bar test key: !(foo.bar?):tag test key: !named!tag test + +--- ! +invoice: 34843 From 677bf3a40b8942b4ae1ca21f8045b6250dc8f1f3 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Mon, 27 Apr 2020 01:16:51 -0400 Subject: [PATCH 12/12] Update CHANGES.md --- CHANGES.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 22f13ee61e..f77c7baadb 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,12 @@ +## Version 10.1.0 (in progress) + +Language improvements: + +- fix(yaml) Fix tags to include non-word characters (#2486) [Peter Plantinga][] + +[Peter Plantinga]: https://github.com/pplantinga + + ## Version 10.0.1 Parser Engine Changes: @@ -66,7 +75,6 @@ Language Improvements: - enh(plaintext) added `text` and `txt` as alias (#2360) [Taufik Nurrohman][] - enh(powershell) added PowerShell v5.1/v7 default aliases as "built_in"s (#2423) [Sean Williams][] - enh(yaml) added support for timestamps (#2475) [Peter Plantinga][] -- fix(yaml) Fix tags to include non-word characters (#2486) [Peter Plantinga][] Developer Tools: