diff --git a/CHANGES.md b/CHANGES.md
index 4a00b76655..ce1b83a012 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,11 +1,13 @@
 ## Master
 
 New languages:
+  none.
 
 New styles:
 - *Night Owl* by [Carl Baxter][]
 
 Improvements:
+- improve parser to properly support look-ahead regex in begin matchers (#2135)
 - blacklist super-common keywords from having relevance (#2179)
 - fix(swift): support for `@dynamicMemberLookup` and `@propertyWrapper` (#2202)
 - fix: `endWithParent` inside `starts` now always works (#2201)
diff --git a/docs/language-guide.rst b/docs/language-guide.rst
index a3cf8d806a..6e598ab8ca 100644
--- a/docs/language-guide.rst
+++ b/docs/language-guide.rst
@@ -258,6 +258,23 @@ Many languages share common modes and regular expressions. Such expressions are
 at the end under "Common regexps" and "Common modes" titles. Use them when possible.
 
 
+Regular Expression Features
+---------------------------
+
+The goal of Highlight.js is to support whatever regex features Javascript itself supports.  You're using real regular expressions, use them responsibly.  That said, due to the design of the parser, there are some caveats.  These are addressed below.
+
+Things we support now that we did not always:
+
+* look-ahead matching for `begin` (#2135)
+* look-ahead matching for `illegal` (#2135)
+* back-references within your regex (#1897)
+
+Things we currently know are still issues:
+
+* look-ahead matching for `end` matchers
+* look-behind matching (when JS supports it) for `end` matchers
+
+
 Contributing
 ------------
 
diff --git a/src/highlight.js b/src/highlight.js
index 8464b65f62..fa8e7c3895 100644
--- a/src/highlight.js
+++ b/src/highlight.js
@@ -313,8 +313,15 @@ https://highlightjs.org/
       );
     }
 
+    function reCountMatchGroups(re) {
+      return (new RegExp(re.toString() + '|')).exec('').length - 1;
+    }
+
     // joinRe logically computes regexps.join(separator), but fixes the
     // backreferences so they continue to match.
+    // it also places each individual regular expression into it's own
+    // match group, keeping track of the sequencing of those match groups
+    // is currently an exercise for the caller. :-)
     function joinRe(regexps, separator) {
       // backreferenceRe matches an open parenthesis or backreference. To avoid
       // an incorrect parse, it additionally matches the following:
@@ -327,11 +334,13 @@ https://highlightjs.org/
       var numCaptures = 0;
       var ret = '';
       for (var i = 0; i < regexps.length; i++) {
+        numCaptures += 1;
         var offset = numCaptures;
         var re = reStr(regexps[i]);
         if (i > 0) {
           ret += separator;
         }
+        ret += "(";
         while (re.length > 0) {
           var match = backreferenceRe.exec(re);
           if (match == null) {
@@ -350,10 +359,75 @@ https://highlightjs.org/
             }
           }
         }
+        ret += ")";
       }
       return ret;
     }
 
+    function buildModeRegex(mode) {
+
+      var matchIndexes = {};
+      var matcherRe;
+      var regexes = [];
+      var matcher = {};
+      var matchAt = 1;
+
+      function addRule(rule, regex) {
+        matchIndexes[matchAt] = rule;
+        regexes.push([rule, regex]);
+        matchAt += reCountMatchGroups(regex) + 1;
+      }
+
+      var term;
+      for (var i=0; i < mode.contains.length; i++) {
+        var re;
+        term = mode.contains[i];
+        if (term.beginKeywords) {
+          re = '\\.?(?:' + term.begin + ')\\.?';
+        } else {
+          re = term.begin;
+        }
+        addRule(term, re);
+      }
+      if (mode.terminator_end)
+        addRule("end", mode.terminator_end);
+      if (mode.illegal)
+        addRule("illegal", mode.illegal);
+
+      var terminators = regexes.map(function(el) { return el[1] });
+      matcherRe = langRe(joinRe(terminators, '|'), true);
+
+      matcher.lastIndex = 0;
+      matcher.exec = function(s) {
+        var rule;
+
+        if( regexes.length === 0) return null;
+
+        matcherRe.lastIndex = matcher.lastIndex;
+        var match = matcherRe.exec(s);
+        if (!match) { return null; }
+
+        for(var i = 0; i<match.length; i++) {
+          if (match[i] != undefined && matchIndexes["" +i] != undefined ) {
+            rule = matchIndexes[""+i];
+            break;
+          }
+        }
+
+        // illegal or end match
+        if (typeof rule === "string") {
+          match.type = rule;
+          match.extra = [mode.illegal, mode.terminator_end];
+        } else {
+          match.type = "begin";
+          match.rule = rule;
+        }
+        return match;
+      }
+
+      return matcher;
+    }
+
     function compileMode(mode, parent) {
       if (mode.compiled)
         return;
@@ -398,14 +472,7 @@ https://highlightjs.org/
         compileMode(mode.starts, parent);
       }
 
-      var terminators =
-        mode.contains.map(function(c) {
-          return c.beginKeywords ? '\\.?(?:' + c.begin + ')\\.?' : c.begin;
-        })
-        .concat([mode.terminator_end, mode.illegal])
-        .map(reStr)
-        .filter(Boolean);
-      mode.terminators = terminators.length ? langRe(joinRe(terminators, '|'), true) : {exec: function(/*s*/) {return null;}};
+      mode.terminators = buildModeRegex(mode);
     }
 
     compileMode(language);
@@ -426,19 +493,6 @@ https://highlightjs.org/
       return new RegExp(value.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&'), 'm');
     }
 
-    function subMode(lexeme, mode) {
-      var i, length;
-
-      for (i = 0, length = mode.contains.length; i < length; i++) {
-        if (testRe(mode.contains[i].beginRe, lexeme)) {
-          if (mode.contains[i].endSameAsBegin) {
-            mode.contains[i].endRe = escapeRe( mode.contains[i].beginRe.exec(lexeme)[0] );
-          }
-          return mode.contains[i];
-        }
-      }
-    }
-
     function endOfMode(mode, lexeme) {
       if (testRe(mode.endRe, lexeme)) {
         while (mode.endsParent && mode.parent) {
@@ -451,10 +505,6 @@ https://highlightjs.org/
       }
     }
 
-    function isIllegal(lexeme, mode) {
-      return !ignore_illegals && testRe(mode.illegalRe, lexeme);
-    }
-
     function keywordMatch(mode, match) {
       var match_str = language.case_insensitive ? match[0].toLowerCase() : match[0];
       return mode.keywords.hasOwnProperty(match_str) && mode.keywords[match_str];
@@ -532,74 +582,112 @@ https://highlightjs.org/
       top = Object.create(mode, {parent: {value: top}});
     }
 
-    function processLexeme(buffer, lexeme) {
 
-      mode_buffer += buffer;
+    function doBeginMatch(match) {
+      var lexeme = match[0];
+      var new_mode = match.rule;
 
-      if (lexeme == null) {
-        processBuffer();
-        return 0;
+      if (new_mode && new_mode.endSameAsBegin) {
+        new_mode.endRe = escapeRe( lexeme );
       }
 
-      var new_mode = subMode(lexeme, top);
-      if (new_mode) {
-        if (new_mode.skip) {
+      if (new_mode.skip) {
+        mode_buffer += lexeme;
+      } else {
+        if (new_mode.excludeBegin) {
           mode_buffer += lexeme;
-        } else {
-          if (new_mode.excludeBegin) {
-            mode_buffer += lexeme;
-          }
-          processBuffer();
-          if (!new_mode.returnBegin && !new_mode.excludeBegin) {
-            mode_buffer = lexeme;
-          }
         }
-        startNewMode(new_mode, lexeme);
-        return new_mode.returnBegin ? 0 : lexeme.length;
+        processBuffer();
+        if (!new_mode.returnBegin && !new_mode.excludeBegin) {
+          mode_buffer = lexeme;
+        }
       }
+      startNewMode(new_mode, lexeme);
+      return new_mode.returnBegin ? 0 : lexeme.length;
+    }
 
+    function doEndMatch(match) {
+      var lexeme = match[0];
       var end_mode = endOfMode(top, lexeme);
-      if (end_mode) {
-        var origin = top;
-        if (origin.skip) {
+      if (!end_mode) { return; }
+
+      var origin = top;
+      if (origin.skip) {
+        mode_buffer += lexeme;
+      } else {
+        if (!(origin.returnEnd || origin.excludeEnd)) {
           mode_buffer += lexeme;
-        } else {
-          if (!(origin.returnEnd || origin.excludeEnd)) {
-            mode_buffer += lexeme;
-          }
-          processBuffer();
-          if (origin.excludeEnd) {
-            mode_buffer = lexeme;
-          }
         }
-        do {
-          if (top.className) {
-            result += spanEndTag;
-          }
-          if (!top.skip && !top.subLanguage) {
-            relevance += top.relevance;
-          }
-          top = top.parent;
-        } while (top !== end_mode.parent);
-        if (end_mode.starts) {
-          if (end_mode.endSameAsBegin) {
-            end_mode.starts.endRe = end_mode.endRe;
-          }
-          startNewMode(end_mode.starts, '');
+        processBuffer();
+        if (origin.excludeEnd) {
+          mode_buffer = lexeme;
         }
-        return origin.returnEnd ? 0 : lexeme.length;
       }
+      do {
+        if (top.className) {
+          result += spanEndTag;
+        }
+        if (!top.skip && !top.subLanguage) {
+          relevance += top.relevance;
+        }
+        top = top.parent;
+      } while (top !== end_mode.parent);
+      if (end_mode.starts) {
+        if (end_mode.endSameAsBegin) {
+          end_mode.starts.endRe = end_mode.endRe;
+        }
+        startNewMode(end_mode.starts, '');
+      }
+      return origin.returnEnd ? 0 : lexeme.length;
+    }
 
-      if (isIllegal(lexeme, top))
+    var lastMatch = {};
+    function processLexeme(text_before_match, match) {
+
+      var lexeme = match && match[0];
+
+      // add non-matched text to the current mode buffer
+      mode_buffer += text_before_match;
+
+      if (lexeme == null) {
+        processBuffer();
+        return 0;
+      }
+
+      // we've found a 0 width match and we're stuck, so we need to advance
+      // this happens when we have badly behaved rules that have optional matchers to the degree that
+      // sometimes they can end up matching nothing at all
+      // Ref: https://github.com/highlightjs/highlight.js/issues/2140
+      if (lastMatch.type=="begin" && match.type=="end" && lastMatch.index == match.index && lexeme === "") {
+        return 1;
+      }
+      lastMatch = match;
+
+      if (match.type==="begin") {
+        return doBeginMatch(match);
+      } else if (match.type==="illegal" && !ignore_illegals) {
+        // illegal match, we do not continue processing
         throw new Error('Illegal lexeme "' + lexeme + '" for mode "' + (top.className || '<unnamed>') + '"');
+      } else if (match.type==="end") {
+        var processed = doEndMatch(match);
+        if (processed != undefined)
+          return processed;
+      }
 
       /*
-      Parser should not reach this point as all types of lexemes should be caught
-      earlier, but if it does due to some bug make sure it advances at least one
-      character forward to prevent infinite looping.
+      Why might be find ourselves here?  Only one occasion now.  An end match that was
+      triggered but could not be completed.  When might this happen?  When an `endSameasBegin`
+      rule sets the end rule to a specific match.  Since the overall mode termination rule that's
+      being used to scan the text isn't recompiled that means that any match that LOOKS like
+      the end (but is not, because it is not an exact match to the beginning) will
+      end up here.  A definite end match, but when `doEndMatch` tries to "reapply"
+      the end rule and fails to match, we wind up here, and just silently ignore the end.
+
+      This causes no real harm other than stopping a few times too many.
       */
+
       mode_buffer += lexeme;
-      return lexeme.length || 1;
+      return lexeme.length;
     }
 
     var language = getLanguage(name);
@@ -625,7 +713,7 @@ https://highlightjs.org/
         match = top.terminators.exec(value);
         if (!match)
           break;
-        count = processLexeme(value.substring(index, match.index), match[0]);
+        count = processLexeme(value.substring(index, match.index), match);
         index = match.index + count;
       }
       processLexeme(value.substr(index));
@@ -637,12 +725,14 @@ https://highlightjs.org/
       return {
         relevance: relevance,
         value: result,
+        illegal:false,
         language: name,
         top: top
       };
     } catch (e) {
       if (e.message && e.message.indexOf('Illegal') !== -1) {
         return {
+          illegal: true,
           relevance: 0,
           value: escape(value)
         };
diff --git a/src/languages/abnf.js b/src/languages/abnf.js
index ca7b8084ce..5b0f5d4030 100644
--- a/src/languages/abnf.js
+++ b/src/languages/abnf.js
@@ -52,11 +52,8 @@ function(hljs) {
     };
 
     var ruleDeclarationMode = {
-        begin: regexes.ruleDeclaration + '\\s*=',
-        returnBegin: true,
-        end: /=/,
-        relevance: 0,
-        contains: [{className: "attribute", begin: regexes.ruleDeclaration}]
+        className: "attribute",
+        begin: regexes.ruleDeclaration + '(?=\\s*=)',
     };
 
     return {
diff --git a/src/languages/brainfuck.js b/src/languages/brainfuck.js
index c9c2755d70..e4e87c4c2e 100644
--- a/src/languages/brainfuck.js
+++ b/src/languages/brainfuck.js
@@ -33,7 +33,7 @@ function(hljs){
       },
       {
         // this mode works as the only relevance counter
-        begin: /\+\+|\-\-/, returnBegin: true,
+        begin: /(?:\+\+|\-\-)/,
         contains: [LITERAL]
       },
       LITERAL
diff --git a/src/languages/coffeescript.js b/src/languages/coffeescript.js
index 7cbbe52834..d1a6361749 100644
--- a/src/languages/coffeescript.js
+++ b/src/languages/coffeescript.js
@@ -67,7 +67,7 @@ function(hljs) {
         {
           // regex can't start with space to parse x / 2 / 3 as two divisions
           // regex can't start with *, and it supports an "illegal" in the main mode
-          begin: /\/(?![ *])(\\\/|.)*?\/[gim]*(?=\W|$)/
+          begin: /\/(?![ *])(\\\/|.)*?\/[gim]*(?=\W)/
         }
       ]
     },
diff --git a/src/languages/livescript.js b/src/languages/livescript.js
index f8731f2037..4584ef82e2 100644
--- a/src/languages/livescript.js
+++ b/src/languages/livescript.js
@@ -81,7 +81,7 @@ function(hljs) {
         {
           // regex can't start with space to parse x / 2 / 3 as two divisions
           // regex can't start with *, and it supports an "illegal" in the main mode
-          begin: /\/(?![ *])(\\\/|.)*?\/[gim]*(?=\W|$)/
+          begin: /\/(?![ *])(\\\/|.)*?\/[gim]*(?=\W)/
         }
       ]
     },
diff --git a/src/languages/stata.js b/src/languages/stata.js
index 6be20a7b88..f5e2a2411d 100644
--- a/src/languages/stata.js
+++ b/src/languages/stata.js
@@ -37,7 +37,7 @@ function(hljs) {
         className: 'built_in',
         variants: [
           {
-            begin: '\\b(abs|acos|asin|atan|atan2|atanh|ceil|cloglog|comb|cos|digamma|exp|floor|invcloglog|invlogit|ln|lnfact|lnfactorial|lngamma|log|log10|max|min|mod|reldif|round|sign|sin|sqrt|sum|tan|tanh|trigamma|trunc|betaden|Binomial|binorm|binormal|chi2|chi2tail|dgammapda|dgammapdada|dgammapdadx|dgammapdx|dgammapdxdx|F|Fden|Ftail|gammaden|gammap|ibeta|invbinomial|invchi2|invchi2tail|invF|invFtail|invgammap|invibeta|invnchi2|invnFtail|invnibeta|invnorm|invnormal|invttail|nbetaden|nchi2|nFden|nFtail|nibeta|norm|normal|normalden|normd|npnchi2|tden|ttail|uniform|abbrev|char|index|indexnot|length|lower|ltrim|match|plural|proper|real|regexm|regexr|regexs|reverse|rtrim|string|strlen|strlower|strltrim|strmatch|strofreal|strpos|strproper|strreverse|strrtrim|strtrim|strupper|subinstr|subinword|substr|trim|upper|word|wordcount|_caller|autocode|byteorder|chop|clip|cond|e|epsdouble|epsfloat|group|inlist|inrange|irecode|matrix|maxbyte|maxdouble|maxfloat|maxint|maxlong|mi|minbyte|mindouble|minfloat|minint|minlong|missing|r|recode|replay|return|s|scalar|d|date|day|dow|doy|halfyear|mdy|month|quarter|week|year|d|daily|dofd|dofh|dofm|dofq|dofw|dofy|h|halfyearly|hofd|m|mofd|monthly|q|qofd|quarterly|tin|twithin|w|weekly|wofd|y|yearly|yh|ym|yofd|yq|yw|cholesky|colnumb|colsof|corr|det|diag|diag0cnt|el|get|hadamard|I|inv|invsym|issym|issymmetric|J|matmissing|matuniform|mreldif|nullmat|rownumb|rowsof|sweep|syminv|trace|vec|vecdiag)(?=\\(|$)'
+            begin: '\\b(abs|acos|asin|atan|atan2|atanh|ceil|cloglog|comb|cos|digamma|exp|floor|invcloglog|invlogit|ln|lnfact|lnfactorial|lngamma|log|log10|max|min|mod|reldif|round|sign|sin|sqrt|sum|tan|tanh|trigamma|trunc|betaden|Binomial|binorm|binormal|chi2|chi2tail|dgammapda|dgammapdada|dgammapdadx|dgammapdx|dgammapdxdx|F|Fden|Ftail|gammaden|gammap|ibeta|invbinomial|invchi2|invchi2tail|invF|invFtail|invgammap|invibeta|invnchi2|invnFtail|invnibeta|invnorm|invnormal|invttail|nbetaden|nchi2|nFden|nFtail|nibeta|norm|normal|normalden|normd|npnchi2|tden|ttail|uniform|abbrev|char|index|indexnot|length|lower|ltrim|match|plural|proper|real|regexm|regexr|regexs|reverse|rtrim|string|strlen|strlower|strltrim|strmatch|strofreal|strpos|strproper|strreverse|strrtrim|strtrim|strupper|subinstr|subinword|substr|trim|upper|word|wordcount|_caller|autocode|byteorder|chop|clip|cond|e|epsdouble|epsfloat|group|inlist|inrange|irecode|matrix|maxbyte|maxdouble|maxfloat|maxint|maxlong|mi|minbyte|mindouble|minfloat|minint|minlong|missing|r|recode|replay|return|s|scalar|d|date|day|dow|doy|halfyear|mdy|month|quarter|week|year|d|daily|dofd|dofh|dofm|dofq|dofw|dofy|h|halfyearly|hofd|m|mofd|monthly|q|qofd|quarterly|tin|twithin|w|weekly|wofd|y|yearly|yh|ym|yofd|yq|yw|cholesky|colnumb|colsof|corr|det|diag|diag0cnt|el|get|hadamard|I|inv|invsym|issym|issymmetric|J|matmissing|matuniform|mreldif|nullmat|rownumb|rowsof|sweep|syminv|trace|vec|vecdiag)(?=\\()'
           }
         ]
       },
diff --git a/src/languages/stylus.js b/src/languages/stylus.js
index 7b1f423c6e..ff794862d3 100644
--- a/src/languages/stylus.js
+++ b/src/languages/stylus.js
@@ -122,7 +122,7 @@ function(hljs) {
     'video'
   ];
 
-  var TAG_END = '[\\.\\s\\n\\[\\:,]';
+  var LOOKAHEAD_TAG_END = '(?=[\\.\\s\\n\\[\\:,])';
 
   var ATTRIBUTES = [
     'align-content',
@@ -365,34 +365,25 @@ function(hljs) {
 
       // class tag
       {
-        begin: '\\.[a-zA-Z][a-zA-Z0-9_-]*' + TAG_END,
-        returnBegin: true,
-        contains: [
-          {className: 'selector-class', begin: '\\.[a-zA-Z][a-zA-Z0-9_-]*'}
-        ]
+        begin: '\\.[a-zA-Z][a-zA-Z0-9_-]*' + LOOKAHEAD_TAG_END,
+        className: 'selector-class'
       },
 
       // id tag
       {
-        begin: '\\#[a-zA-Z][a-zA-Z0-9_-]*' + TAG_END,
-        returnBegin: true,
-        contains: [
-          {className: 'selector-id', begin: '\\#[a-zA-Z][a-zA-Z0-9_-]*'}
-        ]
+        begin: '\\#[a-zA-Z][a-zA-Z0-9_-]*' + LOOKAHEAD_TAG_END,
+        className: 'selector-id'
       },
 
       // tags
       {
-        begin: '\\b(' + TAGS.join('|') + ')' + TAG_END,
-        returnBegin: true,
-        contains: [
-          {className: 'selector-tag', begin: '\\b[a-zA-Z][a-zA-Z0-9_-]*'}
-        ]
+        begin: '\\b(' + TAGS.join('|') + ')' + LOOKAHEAD_TAG_END,
+        className: 'selector-tag'
       },
 
       // psuedo selectors
       {
-        begin: '&?:?:\\b(' + PSEUDO_SELECTORS.join('|') + ')' + TAG_END
+        begin: '&?:?:\\b(' + PSEUDO_SELECTORS.join('|') + ')' + LOOKAHEAD_TAG_END
       },
 
       // @ keywords
diff --git a/src/languages/xml.js b/src/languages/xml.js
index 184da67da4..1f16442735 100644
--- a/src/languages/xml.js
+++ b/src/languages/xml.js
@@ -79,7 +79,7 @@ function(hljs) {
         ending braket. The '$' is needed for the lexeme to be recognized
         by hljs.subMode() that tests lexemes outside the stream.
         */
-        begin: '<style(?=\\s|>|$)', end: '>',
+        begin: '<style(?=\\s|>)', end: '>',
         keywords: {name: 'style'},
         contains: [TAG_INTERNALS],
         starts: {
@@ -90,7 +90,7 @@ function(hljs) {
       {
         className: 'tag',
         // See the comment in the <style tag about the lookahead pattern
-        begin: '<script(?=\\s|>|$)', end: '>',
+        begin: '<script(?=\\s|>)', end: '>',
         keywords: {name: 'script'},
         contains: [TAG_INTERNALS],
         starts: {
diff --git a/src/languages/yaml.js b/src/languages/yaml.js
index 76a56562fb..deeb4ae30a 100644
--- a/src/languages/yaml.js
+++ b/src/languages/yaml.js
@@ -17,7 +17,6 @@ function(hljs) {
   var KEY = {
     className: 'attr',
     variants: [
-      // TODO: remove |$ hack when we have proper look-ahead support
       { begin: '\\w[\\w :\\/.-]*:(?=[ \t]|$)' },
       { begin: '"\\w[\\w :\\/.-]*":(?=[ \t]|$)' }, //double quoted keys
       { begin: '\'\\w[\\w :\\/.-]*\':(?=[ \t]|$)' } //single quoted keys
diff --git a/test/detect/stylus/default.txt b/test/detect/stylus/default.txt
index bb1763fdfe..896578549b 100644
--- a/test/detect/stylus/default.txt
+++ b/test/detect/stylus/default.txt
@@ -18,6 +18,9 @@ buttonBG($color = green)
 button
   buttonBG(red)
 
+.blue-button
+  buttonBG(blue)
+
 #content, .content
   font Tahoma, Chunkfive, sans-serif
   background url('hatch.png')
diff --git a/test/markup/abnf/default.expect.txt b/test/markup/abnf/default.expect.txt
new file mode 100644
index 0000000000..6f11816349
--- /dev/null
+++ b/test/markup/abnf/default.expect.txt
@@ -0,0 +1,22 @@
+<span class="hljs-comment">; line comment</span>
+
+<span class="hljs-attribute">ruleset</span>     =   [optional] *(group1 / group2 / <span class="hljs-keyword">SP</span>) <span class="hljs-keyword">CRLF</span> <span class="hljs-comment">; trailing comment</span>
+
+<span class="hljs-attribute">group1</span>      =   alt1
+<span class="hljs-attribute">group1</span>      =/  alt2
+
+<span class="hljs-attribute">alt1</span>        =   <span class="hljs-symbol">%x41-4D</span> / <span class="hljs-symbol">%d78-90</span>
+
+<span class="hljs-attribute">alt2</span>        =   <span class="hljs-symbol">%b00100001</span>
+
+<span class="hljs-attribute">group2</span>      =   *<span class="hljs-number">1</span><span class="hljs-keyword">DIGIT</span> / <span class="hljs-number">2</span>*<span class="hljs-keyword">HEXDIG</span> / <span class="hljs-number">3</span>*<span class="hljs-number">4</span><span class="hljs-keyword">OCTET</span>
+a
+<span class="hljs-attribute">optional</span>    =   hex-codes
+                / literal
+                / sensitive
+                / insensitive
+
+<span class="hljs-attribute">hex-codes</span>   =   <span class="hljs-symbol">%x68.65.6C.6C.6F</span>
+<span class="hljs-attribute">literal</span>     =   <span class="hljs-string">"string literal"</span>
+<span class="hljs-attribute">sensitive</span>   =   <span class="hljs-symbol">%s</span><span class="hljs-string">"case-sensitive string"</span>
+<span class="hljs-attribute">insensitive</span> =   <span class="hljs-symbol">%i</span><span class="hljs-string">"case-insensitive string"</span>
diff --git a/test/markup/abnf/default.txt b/test/markup/abnf/default.txt
new file mode 100644
index 0000000000..5a8b897899
--- /dev/null
+++ b/test/markup/abnf/default.txt
@@ -0,0 +1,22 @@
+; line comment
+
+ruleset     =   [optional] *(group1 / group2 / SP) CRLF ; trailing comment
+
+group1      =   alt1
+group1      =/  alt2
+
+alt1        =   %x41-4D / %d78-90
+
+alt2        =   %b00100001
+
+group2      =   *1DIGIT / 2*HEXDIG / 3*4OCTET
+a
+optional    =   hex-codes
+                / literal
+                / sensitive
+                / insensitive
+
+hex-codes   =   %x68.65.6C.6C.6F
+literal     =   "string literal"
+sensitive   =   %s"case-sensitive string"
+insensitive =   %i"case-insensitive string"
diff --git a/test/markup/stata/built_ins.expect.txt b/test/markup/stata/built_ins.expect.txt
new file mode 100644
index 0000000000..c151c54fa3
--- /dev/null
+++ b/test/markup/stata/built_ins.expect.txt
@@ -0,0 +1,3 @@
+<span class="hljs-keyword">local</span> b1 = <span class="hljs-built_in">ln</span>(<span class="hljs-symbol">`or'</span>)
+<span class="hljs-keyword">generate</span> logit1 = <span class="hljs-built_in">log</span>( pgty1 / ( 1 - pgty1))
+<span class="hljs-keyword">generate</span> logit2 = <span class="hljs-built_in">log</span>( pgty2 / ( 1 - pgty2))
diff --git a/test/markup/stata/built_ins.txt b/test/markup/stata/built_ins.txt
new file mode 100644
index 0000000000..42e637597d
--- /dev/null
+++ b/test/markup/stata/built_ins.txt
@@ -0,0 +1,3 @@
+local b1 = ln(`or')
+generate logit1 = log( pgty1 / ( 1 - pgty1))
+generate logit2 = log( pgty2 / ( 1 - pgty2))