File tree Expand file tree Collapse file tree 5 files changed +25
-8
lines changed
Sources/_RegexParser/Regex/Parse Expand file tree Collapse file tree 5 files changed +25
-8
lines changed Original file line number Diff line number Diff line change @@ -43,6 +43,7 @@ enum ParseError: Error, Hashable {
4343 case expectedEscape
4444 case invalidEscape( Character )
4545 case confusableCharacter( Character )
46+ case literalCharMustBeNFC( Character )
4647
4748 case quoteMayNotSpanMultipleLines
4849 case unsetExtendedSyntaxMayNotSpanMultipleLines
@@ -145,6 +146,8 @@ extension ParseError: CustomStringConvertible {
145146 return " invalid escape sequence ' \\ \( c) ' "
146147 case . confusableCharacter( let c) :
147148 return " ' \( c) ' is confusable for a metacharacter; use ' \\ u{...}' instead "
149+ case . literalCharMustBeNFC( let c) :
150+ return " ' \( c) ' may not have the expected scalars; specify them explicitly with ' \\ u{...}' instead "
148151 case . quoteMayNotSpanMultipleLines:
149152 return " quoted sequence may not span multiple lines in multi-line literal "
150153 case . unsetExtendedSyntaxMayNotSpanMultipleLines:
Original file line number Diff line number Diff line change @@ -2055,13 +2055,14 @@ extension Parser {
20552055 return . invalid
20562056 }
20572057
2058- guard let charLoc = p. tryEatWithLoc ( ) else {
2058+ guard let charWithLoc = p. tryEatWithLoc ( ) else {
20592059 // We check at the beginning of the function for `isEmpty`, so we should
20602060 // not be at the end of the input here.
20612061 p. unreachable ( " Unexpected end of input " )
20622062 return nil
20632063 }
2064- let char = charLoc. value
2064+ let char = charWithLoc. value
2065+ let charLoc = charWithLoc. location
20652066 switch char {
20662067 case " ) " , " | " :
20672068 if customCC {
@@ -2092,7 +2093,11 @@ extension Parser {
20922093 let scalars = char. unicodeScalars
20932094 if scalars. count > 1 && scalars. first!. isASCII && char != " \r \n " &&
20942095 !char. isLetter && !char. isNumber {
2095- p. error ( . confusableCharacter( char) , at: charLoc. location)
2096+ p. error ( . confusableCharacter( char) , at: charLoc)
2097+ }
2098+ // Reject unescaped non-NFC characters.
2099+ if !char. isNFC {
2100+ p. error ( . literalCharMustBeNFC( char) , at: charLoc)
20962101 }
20972102 break
20982103 }
Original file line number Diff line number Diff line change @@ -332,7 +332,7 @@ extension RegexTests {
332332 doesNotContain: [ . match, . consumeBy, . matchScalarUnchecked] )
333333 // quoted literal is not all ascii -> match scalar when possible, always do boundary checks
334334 expectProgram (
335- for: " aaa \u{301} " ,
335+ for: # "aaa\u{301}"# ,
336336 contains: [ . match, . matchScalar] ,
337337 doesNotContain: [ . consumeBy, . matchScalarUnchecked] )
338338 // scalar mode -> always emit match scalar without boundary checks
@@ -347,7 +347,7 @@ extension RegexTests {
347347 contains: [ . matchScalarUnchecked] ,
348348 doesNotContain: [ . match, . consumeBy, . matchScalar] )
349349 expectProgram (
350- for: " aaa \u{301} " ,
350+ for: # "aaa\u{301}"# ,
351351 semanticLevel: . unicodeScalar,
352352 contains: [ . matchScalarUnchecked] ,
353353 doesNotContain: [ . match, . consumeBy, . matchScalar] )
Original file line number Diff line number Diff line change @@ -226,7 +226,7 @@ extension RegexTests {
226226
227227 // MARK: Allowed combining characters
228228
229- firstMatchTest ( " e \u{301} " , input: " e \u{301} " , match: " e \u{301} " )
229+ firstMatchTest ( # "e\u{301}"# , input: " e \u{301} " , match: " e \u{301} " )
230230 firstMatchTest ( " 1 \u{358} " , input: " 1 \u{358} " , match: " 1 \u{358} " )
231231 firstMatchTest ( #"\ \#u{361}"# , input: " \u{361} " , match: " \u{361} " )
232232
@@ -774,7 +774,7 @@ extension RegexTests {
774774 firstMatchTest ( #"[\d]"# , input: " 1️⃣ " , match: " 1️⃣ " )
775775 firstMatchTest ( #"(?P)[\d]"# , input: " 1️⃣ " , match: nil )
776776 firstMatchTest ( " [0-2&&1-3] " , input: " 1️⃣ " , match: nil )
777- firstMatchTest ( " [1-2e \u{301} ] " , input: " 1️⃣ " , match: nil )
777+ firstMatchTest ( # "[1-2e\u{301}]"# , input: " 1️⃣ " , match: nil )
778778
779779 firstMatchTest ( #"[\u{3A9}-\u{3A9}]"# , input: " \u{3A9} " , match: " \u{3A9} " )
780780
Original file line number Diff line number Diff line change @@ -374,10 +374,19 @@ extension RegexTests {
374374
375375 // MARK: Allowed combining characters
376376
377- parseTest ( " e \u{301} " , " e \u{301} " )
378377 parseTest ( " 1 \u{358} " , " 1 \u{358} " )
379378 parseTest ( #"\ \#u{361}"# , " \u{361} " )
380379
380+ // We don't allow non-NFC literal characters.
381+ parseTest (
382+ " e \u{301} " , " e \u{301} " , throwsError: . literalCharMustBeNFC( " e \u{301} " )
383+ )
384+ parseTest ( " \u{E9} " , " e \u{301} " )
385+
386+ // Can't be escaped either, must be written using `\u{...}`.
387+ parseTest (
388+ " \\ e \u{301} " , " e \u{301} " , throwsError: . invalidEscape( " e \u{301} " ) )
389+
381390 // MARK: Alternations
382391
383392 parseTest (
You can’t perform that action at this time.
0 commit comments