From ce247a051abca2141dc37ae2cc9a49b4da79e1fe Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Tue, 29 Oct 2019 10:10:33 -0400 Subject: [PATCH 1/3] Fix regex look-ahead for end matchers Closes #2216. --- src/highlight.js | 3 ++- test/parser/index.js | 1 + test/parser/look-ahead-end-matchers.js | 31 ++++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 test/parser/look-ahead-end-matchers.js diff --git a/src/highlight.js b/src/highlight.js index 834f6ff5fe..fe6f99b3db 100644 --- a/src/highlight.js +++ b/src/highlight.js @@ -608,7 +608,8 @@ https://highlightjs.org/ function doEndMatch(match) { var lexeme = match[0]; - var end_mode = endOfMode(top, lexeme); + var matchPlusRemainder = value.substr(match.index); + var end_mode = endOfMode(top, matchPlusRemainder); if (!end_mode) { return; } var origin = top; diff --git a/test/parser/index.js b/test/parser/index.js index 3dc23c1290..1472a03191 100644 --- a/test/parser/index.js +++ b/test/parser/index.js @@ -3,4 +3,5 @@ describe('hljs', function() { require('./reuse-endsWithParent'); require('./should-not-destroyData'); + require('./look-ahead-end-matchers'); }); diff --git a/test/parser/look-ahead-end-matchers.js b/test/parser/look-ahead-end-matchers.js new file mode 100644 index 0000000000..241e1fd0f6 --- /dev/null +++ b/test/parser/look-ahead-end-matchers.js @@ -0,0 +1,31 @@ +const hljs = require('../../build'); + +describe("parser specifics", function () { + + // CONTEXT: https://github.com/highlightjs/highlight.js/pull/2219 + describe("a grammar with look-ahead end matchers", () => { + it("should match successfully", () => { + hljs.registerLanguage('test-language', (hljs) => { + + // broken regex from old Fortran ruleset + const PATTERN = { + className: "pattern", + begin: '[A-Z]{3}', + // followed by at least one space + end: '\\d{3}(?=\\s+)' + } + + return { + contains: [PATTERN] + }; + }); + + hljs.highlight('test-language', 'ABC123 is the secret. XYZ123. End of string: ABC123').value + .should.equal( + // one full match at beginning, other match begins with XYZ but then never terminates, + // so the end of the parsing finally closes the span tag + 'ABC123 is the secret. XYZ123. End of string: ABC123' + ) + }) + }) +}) From 2ac217ac6d5162754f3ec91bc1d7d01791eb2c11 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Tue, 29 Oct 2019 10:16:56 -0400 Subject: [PATCH 2/3] add changelog --- CHANGES.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 018992dfe3..4e0c278b3b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,16 @@ +## Version [master] + +New languages: + none. + +New styles: + none. + +Improvements: +- fix(parser): Look-ahead regex now work for end matches also (#2237) [Josh Goebel][] + +[Josh Goebel]: https://github.com/yyyc514 + ## Version 9.16.0 New languages: From 7c00276841e861b4859605bff9cf55d314a104dc Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Tue, 29 Oct 2019 10:19:11 -0400 Subject: [PATCH 3/3] update docs --- docs/language-guide.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/language-guide.rst b/docs/language-guide.rst index 6e598ab8ca..18c176f676 100644 --- a/docs/language-guide.rst +++ b/docs/language-guide.rst @@ -265,13 +265,14 @@ The goal of Highlight.js is to support whatever regex features Javascript itself Things we support now that we did not always: -* look-ahead matching for `begin` (#2135) -* look-ahead matching for `illegal` (#2135) -* back-references within your regex (#1897) +* look-ahead regex matching for `begin` (#2135) +* look-ahead regex matching for `end` (#2237) +* look-ahead regex matching for `illegal` (#2135) +* back-references within your regex matches (#1897) +* look-behind matching (when JS supports it) for `begin` (#2135) Things we currently know are still issues: -* look-ahead matching for `end` matchers * look-behind matching (when JS supports it) for `end` matchers