diff --git a/CHANGES.md b/CHANGES.md index d34ad46f26..0f3c7d6f78 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -7,6 +7,7 @@ New styles: none. Improvements: +- fix(parser): Look-ahead regex now work for end matches also (#2237) [Josh Goebel][] - fix(parser): Better errors when a language is missing (#2236) [Josh Goebel][] [Josh Goebel]: https://github.com/yyyc514 diff --git a/docs/language-guide.rst b/docs/language-guide.rst index 6e598ab8ca..18c176f676 100644 --- a/docs/language-guide.rst +++ b/docs/language-guide.rst @@ -265,13 +265,14 @@ The goal of Highlight.js is to support whatever regex features Javascript itself Things we support now that we did not always: -* look-ahead matching for `begin` (#2135) -* look-ahead matching for `illegal` (#2135) -* back-references within your regex (#1897) +* look-ahead regex matching for `begin` (#2135) +* look-ahead regex matching for `end` (#2237) +* look-ahead regex matching for `illegal` (#2135) +* back-references within your regex matches (#1897) +* look-behind matching (when JS supports it) for `begin` (#2135) Things we currently know are still issues: -* look-ahead matching for `end` matchers * look-behind matching (when JS supports it) for `end` matchers diff --git a/src/highlight.js b/src/highlight.js index 3a460b96db..b43b952247 100644 --- a/src/highlight.js +++ b/src/highlight.js @@ -614,7 +614,8 @@ https://highlightjs.org/ function doEndMatch(match) { var lexeme = match[0]; - var end_mode = endOfMode(top, lexeme); + var matchPlusRemainder = value.substr(match.index); + var end_mode = endOfMode(top, matchPlusRemainder); if (!end_mode) { return; } var origin = top; diff --git a/test/parser/index.js b/test/parser/index.js index 3dc23c1290..1472a03191 100644 --- a/test/parser/index.js +++ b/test/parser/index.js @@ -3,4 +3,5 @@ describe('hljs', function() { require('./reuse-endsWithParent'); require('./should-not-destroyData'); + require('./look-ahead-end-matchers'); }); diff --git a/test/parser/look-ahead-end-matchers.js b/test/parser/look-ahead-end-matchers.js new file mode 100644 index 0000000000..241e1fd0f6 --- /dev/null +++ b/test/parser/look-ahead-end-matchers.js @@ -0,0 +1,31 @@ +const hljs = require('../../build'); + +describe("parser specifics", function () { + + // CONTEXT: https://github.com/highlightjs/highlight.js/pull/2219 + describe("a grammar with look-ahead end matchers", () => { + it("should match successfully", () => { + hljs.registerLanguage('test-language', (hljs) => { + + // broken regex from old Fortran ruleset + const PATTERN = { + className: "pattern", + begin: '[A-Z]{3}', + // followed by at least one space + end: '\\d{3}(?=\\s+)' + } + + return { + contains: [PATTERN] + }; + }); + + hljs.highlight('test-language', 'ABC123 is the secret. XYZ123. End of string: ABC123').value + .should.equal( + // one full match at beginning, other match begins with XYZ but then never terminates, + // so the end of the parsing finally closes the span tag + 'ABC123 is the secret. XYZ123. End of string: ABC123' + ) + }) + }) +})