@@ -51,7 +51,26 @@ extension DSLTree.Node {
5151 }
5252}
5353
54+ extension DSLTree . _AST . Atom {
55+ var singleScalarASCIIValue : UInt8 ? {
56+ return ast. singleScalarASCIIValue
57+ }
58+ }
59+
5460extension DSLTree . Atom {
61+ var singleScalarASCIIValue : UInt8 ? {
62+ switch self {
63+ case let . char( c) where c != " \r \n " :
64+ return c. asciiValue
65+ case let . scalar( s) where s. isASCII:
66+ return UInt8 ( ascii: s)
67+ case let . unconverted( atom) :
68+ return atom. singleScalarASCIIValue
69+ default :
70+ return nil
71+ }
72+ }
73+
5574 // TODO: If ByteCodeGen switches first, then this is unnecessary for
5675 // top-level nodes, but it's also invoked for `.atom` members of a custom CC
5776 func generateConsumer(
@@ -61,17 +80,32 @@ extension DSLTree.Atom {
6180
6281 switch self {
6382 case let . char( c) :
64- // TODO: Match level?
65- return { input, bounds in
66- let low = bounds. lowerBound
67- if isCaseInsensitive && c. isCased {
68- return input [ low] . lowercased ( ) == c. lowercased ( )
69- ? input. index ( after: low)
70- : nil
71- } else {
72- return input [ low] == c
73- ? input. index ( after: low)
74- : nil
83+ if opts. semanticLevel == . graphemeCluster {
84+ return { input, bounds in
85+ let low = bounds. lowerBound
86+ if isCaseInsensitive && c. isCased {
87+ return input [ low] . lowercased ( ) == c. lowercased ( )
88+ ? input. index ( after: low)
89+ : nil
90+ } else {
91+ return input [ low] == c
92+ ? input. index ( after: low)
93+ : nil
94+ }
95+ }
96+ } else {
97+ let consumers = c. unicodeScalars. map { s in consumeScalar {
98+ isCaseInsensitive
99+ ? $0. properties. lowercaseMapping == s. properties. lowercaseMapping
100+ : $0 == s
101+ } }
102+ return { input, bounds in
103+ for fn in consumers {
104+ if let idx = fn ( input, bounds) {
105+ return idx
106+ }
107+ }
108+ return nil
75109 }
76110 }
77111 case let . scalar( s) :
@@ -177,7 +211,18 @@ extension AST.Atom {
177211 default : return nil
178212 }
179213 }
180-
214+
215+ var singleScalarASCIIValue : UInt8 ? {
216+ switch kind {
217+ case let . char( c) where c != " \r \n " :
218+ return c. asciiValue
219+ case let . scalar( s) where s. value. isASCII:
220+ return UInt8 ( ascii: s. value)
221+ default :
222+ return nil
223+ }
224+ }
225+
181226 func generateConsumer(
182227 _ opts: MatchingOptions
183228 ) throws -> MEProgram . ConsumeFunction ? {
@@ -235,6 +280,34 @@ extension AST.Atom {
235280}
236281
237282extension DSLTree . CustomCharacterClass . Member {
283+ func asAsciiBitset(
284+ _ opts: MatchingOptions ,
285+ _ isInverted: Bool
286+ ) -> DSLTree . CustomCharacterClass . AsciiBitset ? {
287+ switch self {
288+ case let . atom( a) :
289+ if let val = a. singleScalarASCIIValue {
290+ return DSLTree . CustomCharacterClass. AsciiBitset (
291+ val,
292+ isInverted,
293+ opts. isCaseInsensitive
294+ )
295+ }
296+ case let . range( low, high) :
297+ if let lowVal = low. singleScalarASCIIValue, let highVal = high. singleScalarASCIIValue {
298+ return DSLTree . CustomCharacterClass. AsciiBitset (
299+ low: lowVal,
300+ high: highVal,
301+ isInverted: isInverted,
302+ isCaseInsensitive: opts. isCaseInsensitive
303+ )
304+ }
305+ default :
306+ return nil
307+ }
308+ return nil
309+ }
310+
238311 func generateConsumer(
239312 _ opts: MatchingOptions
240313 ) throws -> MEProgram . ConsumeFunction {
@@ -342,6 +415,19 @@ extension DSLTree.CustomCharacterClass.Member {
342415}
343416
344417extension DSLTree . CustomCharacterClass {
418+ func asAsciiBitset( _ opts: MatchingOptions ) -> AsciiBitset ? {
419+ return members. reduce (
420+ . init( isInverted: isInverted) ,
421+ { result, member in
422+ if let next = member. asAsciiBitset ( opts, isInverted) {
423+ return result? . union ( next)
424+ } else {
425+ return nil
426+ }
427+ }
428+ )
429+ }
430+
345431 func generateConsumer(
346432 _ opts: MatchingOptions
347433 ) throws -> MEProgram . ConsumeFunction {
0 commit comments