Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions Sources/_StringProcessing/ByteCodeGen.swift
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,7 @@ extension Compiler.ByteCodeGen {
if options.isCaseInsensitive && c.isCased {
// TODO: buildCaseInsensitiveMatch(c) or buildMatch(c, caseInsensitive: true)
builder.buildConsume { input, bounds in
let inputChar = input[bounds.lowerBound].lowercased()
let matchChar = c.lowercased()
return inputChar == matchChar
return input[bounds.lowerBound].caseFoldedEquals(c)
? input.index(after: bounds.lowerBound)
: nil
}
Expand Down Expand Up @@ -655,11 +653,13 @@ extension Compiler.ByteCodeGen {
if options.isCaseInsensitive {
// TODO: buildCaseInsensitiveMatchSequence(c) or alternative
builder.buildConsume { input, bounds in
// FIXME: This needs to iterate over the case-folded strings, not
// iterate and then case-fold as we go.
var iterator = s.makeIterator()
var currentIndex = bounds.lowerBound
while let ch = iterator.next() {
guard currentIndex < bounds.upperBound,
ch.lowercased() == input[currentIndex].lowercased()
ch.caseFoldedEquals(input[currentIndex])
else { return nil }
input.formIndex(after: &currentIndex)
}
Expand Down
2 changes: 1 addition & 1 deletion Sources/_StringProcessing/ConsumerInterface.swift
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ extension DSLTree.Atom {
return { input, bounds in
let low = bounds.lowerBound
if isCaseInsensitive && c.isCased {
return input[low].lowercased() == c.lowercased()
return input[low].caseFoldedEquals(c)
? input.index(after: low)
: nil
} else {
Expand Down
18 changes: 18 additions & 0 deletions Sources/_StringProcessing/Unicode/CaseConversion.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,24 @@
//
//===----------------------------------------------------------------------===//

@_spi(_Unicode) import Swift

// TODO

extension Character {
/// Whether this character and `c` are equal when case folded.
func caseFoldedEquals(_ c: Character) -> Bool {
guard #available(SwiftStdlib 5.7, *) else { fatalError() }
let foldedSelf = unicodeScalars.lazy.map(\.properties._caseFolded).joined()
let foldedOther = c.unicodeScalars.lazy.map(\.properties._caseFolded).joined()
return foldedSelf.elementsEqual(foldedOther)
}
}

extension UnicodeScalar {
/// Whether this Unicode scalar and `s` are equal when case folded.
func caseFoldedEquals(_ s: UnicodeScalar) -> Bool {
guard #available(SwiftStdlib 5.7, *) else { fatalError() }
return properties._caseFolded == s.properties._caseFolded
}
}
2 changes: 1 addition & 1 deletion Sources/_StringProcessing/_CharacterClassModel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ public struct _CharacterClassModel: Hashable {
switch self {
case .character(let c):
if options.isCaseInsensitive {
return c.lowercased() == character.lowercased()
return c.caseFoldedEquals(character)
} else {
return c == character
}
Expand Down
33 changes: 22 additions & 11 deletions Tests/RegexTests/UTS18Tests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -238,20 +238,31 @@ extension UTS18Tests {
expectFirstMatch("Dåb", regex(#"Dåb"#).ignoresCase(), "Dåb")
expectFirstMatch("dÅB", regex(#"Dåb"#).ignoresCase(), "dÅB")
expectFirstMatch("D\u{212B}B", regex(#"Dåb"#).ignoresCase(), "D\u{212B}B")
}

let sigmas = "σΣς"
expectFirstMatch(sigmas, regex(#"^σ+$"#).ignoresCase(), sigmas[...])
expectFirstMatch(sigmas, regex(#"^Σ+$"#).ignoresCase(), sigmas[...])
expectFirstMatch(sigmas, regex(#"^ς+$"#).ignoresCase(), sigmas[...])

func testSimpleLooseMatches_XFail() {
XCTExpectFailure("Need case folding support") {
let sigmas = "σΣς"
expectFirstMatch(sigmas, regex(#"σ+"#).ignoresCase(), sigmas[...])
expectFirstMatch(sigmas, regex(#"Σ+"#).ignoresCase(), sigmas[...])
expectFirstMatch(sigmas, regex(#"ς+"#).ignoresCase(), sigmas[...])

// TODO: Test German sharp S
// TODO: Test char classes, e.g. [\p{Block=Phonetic_Extensions} [A-E]]
// Custom character classes
for regexCh in sigmas {
for inputCh in sigmas {
expectFirstMatch(String(inputCh), regex("[\(regexCh)]").ignoresCase(), String(inputCh)[...])
if regexCh != inputCh {
XCTAssertFalse(String(inputCh).contains(regex("[\(regexCh)]")))
}
}
}

expectFirstMatch("Strauß", regex("ß").ignoresCase(), "ß")
XCTExpectFailure {
expectFirstMatch("Strauss", regex("ß").ignoresCase(), "ss")
}

// TODO: Test char classes, e.g. [\p{Block=Phonetic_Extensions} [A-E]]
// TODO: Document when full case folding applies
}

// RL1.6 Line Boundaries
//
// To meet this requirement, if an implementation provides for line-boundary
Expand Down