@@ -342,8 +342,8 @@ extension Source {
342342    } . value
343343  } 
344344
345-   /// Eat  a scalar off the front, starting from after the
346-   /// backslash and  base character (e.g. `\u` or `\x`).
345+   /// Try to eat  a scalar off the front, starting from after the backslash and 
346+   /// base character (e.g. `\u` or `\x`).
347347  ///
348348  ///     UniScalar -> 'u{' UniScalarSequence '}'
349349  ///                | 'u'  HexDigit{4}
@@ -353,60 +353,60 @@ extension Source {
353353  ///                | 'o{' OctalDigit{1...} '}'
354354  ///                | '0' OctalDigit{0...3}
355355  ///
356-   mutating  func  expectUnicodeScalar( 
357-     escapedCharacter base:  Character 
358-   )  throws  ->  AST . Atom . Kind  { 
356+   mutating  func  lexUnicodeScalar( )  throws  ->  AST . Atom . Kind ?   { 
359357    try   recordLoc  {  src in 
358+       try   src. tryEating  {  src in 
360359
361-       func  nullScalar( )  ->  AST . Atom . Kind  { 
362-         let  pos  =  src. currentPosition
363-         return  . scalar( . init( UnicodeScalar ( 0 ) ,  SourceLocation ( pos ..<  pos) ) ) 
364-       } 
365- 
366-       // TODO: PCRE offers a different behavior if PCRE2_ALT_BSUX is set.
367-       switch  base { 
368-       // Hex numbers.
369-       case  " u "  where  src. tryEat ( " { " ) : 
370-         return  try   src. expectUnicodeScalarSequence ( eating:  " } " ) 
371- 
372-       case  " x "  where  src. tryEat ( " { " ) : 
373-         let  str  =  try   src. lexUntil ( eating:  " } " ) 
374-         return  . scalar( try   Source . validateUnicodeScalar ( str,  . hex) ) 
375- 
376-       case  " x " : 
377-         // \x expects *up to* 2 digits.
378-         guard  let  digits =  src. tryEatLocatedPrefix ( maxLength:  2 ,  \. isHexDigit) 
379-         else  { 
380-           // In PCRE, \x without any valid hex digits is \u{0}.
381-           // TODO: This doesn't appear to be followed by ICU or Oniguruma, so
382-           // could be changed to throw an error if we had a parsing mode for
383-           // them.
384-           return  nullScalar ( ) 
360+         func  nullScalar( )  ->  AST . Atom . Kind  { 
361+           let  pos  =  src. currentPosition
362+           return  . scalar( . init( UnicodeScalar ( 0 ) ,  SourceLocation ( pos ..<  pos) ) ) 
385363        } 
386-         return  . scalar( try   Source . validateUnicodeScalar ( digits,  . hex) ) 
387364
388-       case  " u " : 
389-         return  . scalar( try   src. expectUnicodeScalar ( numDigits:  4 ) ) 
390-       case  " U " : 
391-         return  . scalar( try   src. expectUnicodeScalar ( numDigits:  8 ) ) 
365+         // TODO: PCRE offers a different behavior if PCRE2_ALT_BSUX is set.
366+         switch  src. tryEat ( )  { 
367+         // Hex numbers.
368+         case  " u "  where  src. tryEat ( " { " ) : 
369+           return  try   src. expectUnicodeScalarSequence ( eating:  " } " ) 
370+ 
371+         case  " x "  where  src. tryEat ( " { " ) : 
372+           let  str  =  try   src. lexUntil ( eating:  " } " ) 
373+           return  . scalar( try   Source . validateUnicodeScalar ( str,  . hex) ) 
374+ 
375+         case  " x " : 
376+           // \x expects *up to* 2 digits.
377+           guard  let  digits =  src. tryEatLocatedPrefix ( maxLength:  2 ,  \. isHexDigit) 
378+           else  { 
379+             // In PCRE, \x without any valid hex digits is \u{0}.
380+             // TODO: This doesn't appear to be followed by ICU or Oniguruma, so
381+             // could be changed to throw an error if we had a parsing mode for
382+             // them.
383+             return  nullScalar ( ) 
384+           } 
385+           return  . scalar( try   Source . validateUnicodeScalar ( digits,  . hex) ) 
386+ 
387+         case  " u " : 
388+           return  . scalar( try   src. expectUnicodeScalar ( numDigits:  4 ) ) 
389+         case  " U " : 
390+           return  . scalar( try   src. expectUnicodeScalar ( numDigits:  8 ) ) 
391+ 
392+         // Octal numbers.
393+         case  " o "  where  src. tryEat ( " { " ) : 
394+           let  str  =  try   src. lexUntil ( eating:  " } " ) 
395+           return  . scalar( try   Source . validateUnicodeScalar ( str,  . octal) ) 
396+ 
397+         case  " 0 " : 
398+           // We can read *up to* 3 more octal digits.
399+           // FIXME: PCRE can only read up to 2 octal digits, if we get a strict
400+           // PCRE mode, we should limit it here.
401+           guard  let  digits =  src. tryEatLocatedPrefix ( maxLength:  3 ,  \. isOctalDigit) 
402+           else  { 
403+             return  nullScalar ( ) 
404+           } 
405+           return  . scalar( try   Source . validateUnicodeScalar ( digits,  . octal) ) 
392406
393-       // Octal numbers.
394-       case  " o "  where  src. tryEat ( " { " ) : 
395-         let  str  =  try   src. lexUntil ( eating:  " } " ) 
396-         return  . scalar( try   Source . validateUnicodeScalar ( str,  . octal) ) 
397- 
398-       case  " 0 " : 
399-         // We can read *up to* 3 more octal digits.
400-         // FIXME: PCRE can only read up to 2 octal digits, if we get a strict
401-         // PCRE mode, we should limit it here.
402-         guard  let  digits =  src. tryEatLocatedPrefix ( maxLength:  3 ,  \. isOctalDigit) 
403-         else  { 
404-           return  nullScalar ( ) 
407+         default : 
408+           return  nil 
405409        } 
406-         return  . scalar( try   Source . validateUnicodeScalar ( digits,  . octal) ) 
407- 
408-       default : 
409-         fatalError ( " Unexpected scalar start " ) 
410410      } 
411411    } . value
412412  } 
@@ -802,6 +802,11 @@ extension Source {
802802  mutating  func  lexMatchingOptionSequence( 
803803    context:  ParsingContext 
804804  )  throws  ->  AST . MatchingOptionSequence ?   { 
805+     // PCRE accepts '(?)'
806+     // TODO: This is a no-op, should we warn?
807+     if  peek ( )  ==  " ) "  { 
808+       return  . init( caretLoc:  nil ,  adding:  [ ] ,  minusLoc:  nil ,  removing:  [ ] ) 
809+     } 
805810    let  ateCaret  =  recordLoc  {  $0. tryEat ( " ^ " )  } 
806811
807812    // TODO: Warn on duplicate options, and options appearing in both adding
@@ -1707,6 +1712,11 @@ extension Source {
17071712        return  ref
17081713      } 
17091714
1715+       // Hexadecimal and octal unicode scalars.
1716+       if  let  scalar =  try   src. lexUnicodeScalar ( )  { 
1717+         return  scalar
1718+       } 
1719+ 
17101720      guard  let  char =  src. tryEat ( )  else  { 
17111721        throw  ParseError . expectedEscape
17121722      } 
@@ -1718,14 +1728,6 @@ extension Source {
17181728        return  . escaped( builtin) 
17191729      } 
17201730
1721-       switch  char { 
1722-       // Hexadecimal and octal unicode scalars.
1723-       case  " u " ,  " x " ,  " U " ,  " o " ,  " 0 " : 
1724-         return  try   src. expectUnicodeScalar ( escapedCharacter:  char) 
1725-       default : 
1726-         break 
1727-       } 
1728- 
17291731      // We only allow unknown escape sequences for non-letter non-number ASCII,
17301732      // and non-ASCII whitespace.
17311733      // TODO: Once we have fix-its, suggest a `0` prefix for octal `[\7]`.
0 commit comments