From 7996289f80751c7325821a23674e19540e0ed682 Mon Sep 17 00:00:00 2001 From: Richard Wei Date: Mon, 24 Jan 2022 23:43:31 -0800 Subject: [PATCH] Change quantifiers in the DSL from structure types to top-level functions. This allows for a lot more flexibility with overloading a quantifier based on the input `Match` type. The immediate benefit of this is getting rid of void and nested void types (see example below), and as a result eliminate the need for void-filtering within concatenation. A more important benefit is being able to get rid of nominal tuples and switch back to Swift tuples, as Swift tuples enable strongly typed named captures and eliminates the complexity that comes with nominal tuples. ----- Before: ```swift let r0 = OneOrMore(.digit) // => `.Match == Tuple2` let r1 = Optionally(.digit) // => `.Match == Tuple2` let r2 = OneOrMore(Repeat(Optionally(.digit))) // => `.Match == Tuple2` "123".match(r2) // => `RegexMatch>?` ``` After: ```swift let r0 = oneOrMore(.digit) // => `.Match == Substring` let r1 = optionally(.digit) // => `.Match == Substring` let r2 = oneOrMore(many(optionally(.digit))) // => `.Match == Substring` "123".match(r2) // => `RegexMatch` ``` ----- Before: ```swift /(?\d+)/ // => `Regex>` ``` After: ```swift /(?\d+)/ // => `Regex<(Substring, number: Substring)>` ``` --- .../Participants/RegexParticipant.swift | 14 +- .../VariadicsGenerator.swift | 128 ++- .../_StringProcessing/RegexDSL/Builder.swift | 6 - .../RegexDSL/Concatenation.swift | 964 +++++++++++++++++- Sources/_StringProcessing/RegexDSL/Core.swift | 22 - Sources/_StringProcessing/RegexDSL/DSL.swift | 126 +-- Tests/RegexTests/AlgorithmsTests.swift | 8 +- Tests/RegexTests/RegexDSLTests.swift | 84 +- 8 files changed, 1200 insertions(+), 152 deletions(-) diff --git a/Sources/Exercises/Participants/RegexParticipant.swift b/Sources/Exercises/Participants/RegexParticipant.swift index a73f9ecd0..8ae06437d 100644 --- a/Sources/Exercises/Participants/RegexParticipant.swift +++ b/Sources/Exercises/Participants/RegexParticipant.swift @@ -80,16 +80,16 @@ private func graphemeBreakPropertyData( forLine line: String ) -> GraphemeBreakEntry? { line.match { - OneOrMore(.hexDigit).tryCapture(Unicode.Scalar.init(hex:)) - Optionally { + oneOrMore(.hexDigit).tryCapture(Unicode.Scalar.init(hex:)) + optionally { ".." - OneOrMore(.hexDigit).tryCapture(Unicode.Scalar.init(hex:)) + oneOrMore(.hexDigit).tryCapture(Unicode.Scalar.init(hex:)) } - OneOrMore(.whitespace) + oneOrMore(.whitespace) ";" - OneOrMore(.whitespace) - OneOrMore(.word).tryCapture(Unicode.GraphemeBreakProperty.init) - Repeat(.any) + oneOrMore(.whitespace) + oneOrMore(.word).tryCapture(Unicode.GraphemeBreakProperty.init) + many(.any) }.map { let (_, lower, upper, property) = $0.match.tuple return GraphemeBreakEntry(lower...(upper ?? lower), property) diff --git a/Sources/VariadicsGenerator/VariadicsGenerator.swift b/Sources/VariadicsGenerator/VariadicsGenerator.swift index 130e51c27..6e58bbf5e 100644 --- a/Sources/VariadicsGenerator/VariadicsGenerator.swift +++ b/Sources/VariadicsGenerator/VariadicsGenerator.swift @@ -89,7 +89,7 @@ struct StandardErrorStream: TextOutputStream { var standardError = StandardErrorStream() typealias Counter = Int64 -let patternProtocolName = "RegexProtocol" +let regexProtocolName = "RegexProtocol" let concatenationStructTypeBaseName = "Concatenate" let capturingGroupTypeBaseName = "CapturingGroup" let matchAssociatedTypeName = "Match" @@ -132,9 +132,10 @@ struct VariadicsGenerator: ParsableCommand { emitTupleStruct(arity: arity) } + print("Generating concatenation overloads...", to: &standardError) for (leftArity, rightArity) in Permutations(totalArity: maxArity) { print( - "Left arity: \(leftArity) Right arity: \(rightArity)", + " Left arity: \(leftArity) Right arity: \(rightArity)", to: &standardError) emitConcatenation(leftArity: leftArity, rightArity: rightArity) } @@ -144,7 +145,20 @@ struct VariadicsGenerator: ParsableCommand { } output("\n\n") - output("// END AUTO-GENERATED CONTENT") + + print("Generating quantifiers...", to: &standardError) + for arity in 0..: \(patternProtocolName)") + output("\n>: \(regexProtocolName)") output(" where ") output("R0.Match == ") if leftArity == 0 { @@ -343,7 +357,7 @@ struct VariadicsGenerator: ParsableCommand { ", C\($0)" } output(""" - , R0: \(patternProtocolName), R1: \(patternProtocolName)>( + , R0: \(regexProtocolName), R1: \(regexProtocolName)>( combining next: R1, into combined: R0 ) -> Regex< """) @@ -374,4 +388,106 @@ struct VariadicsGenerator: ParsableCommand { """) } + + enum QuantifierKind: String, CaseIterable { + case zeroOrOne = "optionally" + case zeroOrMore = "many" + case oneOrMore = "oneOrMore" + + var typeName: String { + switch self { + case .zeroOrOne: return "_ZeroOrOne" + case .zeroOrMore: return "_ZeroOrMore" + case .oneOrMore: return "_OneOrMore" + } + } + + var operatorName: String { + switch self { + case .zeroOrOne: return ".?" + case .zeroOrMore: return ".+" + case .oneOrMore: return ".*" + } + } + + var astQuantifierAmount: String { + switch self { + case .zeroOrOne: return "zeroOrOne" + case .zeroOrMore: return "zeroOrMore" + case .oneOrMore: return "oneOrMore" + } + } + } + + func emitQuantifier(kind: QuantifierKind, arity: Int) { + assert(arity >= 0) + func genericParameters(withConstraints: Bool) -> String { + var result = "" + if arity > 0 { + result += "W" + result += (0.." + let componentConstraint: String = arity == 0 ? "" : + "where Component.Match == Tuple\(arity+1)" + let quantifiedCaptures: String = { + switch kind { + case .zeroOrOne: + return "\(capturesTupled)?" + case .zeroOrMore, .oneOrMore: + return "[\(capturesTupled)]" + } + }() + let matchType = arity == 0 ? baseMatchTypeName : "Tuple2<\(baseMatchTypeName), \(quantifiedCaptures)>" + output(""" + public struct \(kind.typeName)_\(arity)<\(genericParameters(withConstraints: true))>: \(regexProtocolName) \(componentConstraint) { + public typealias \(matchAssociatedTypeName) = \(matchType) + public let regex: Regex<\(matchAssociatedTypeName)> + public init(component: Component) { + self.regex = .init(node: .quantification(.\(kind.astQuantifierAmount), .eager, component.regex.root)) + } + } + + \(arity == 0 ? "@_disfavoredOverload" : "") + public func \(kind.rawValue)<\(genericParameters(withConstraints: true))>( + _ component: Component + ) -> \(kind.typeName)_\(arity)<\(genericParameters(withConstraints: false))> { + .init(component: component) + } + + \(arity == 0 ? "@_disfavoredOverload" : "") + public func \(kind.rawValue)<\(genericParameters(withConstraints: true))>( + @RegexBuilder _ component: () -> Component + ) -> \(kind.typeName)_\(arity)<\(genericParameters(withConstraints: false))> { + \(kind.rawValue)(component()) + } + + \(arity == 0 ? "@_disfavoredOverload" : "") + public postfix func \(kind.operatorName)<\(genericParameters(withConstraints: true))>( + _ component: Component + ) -> \(kind.typeName)_\(arity)<\(genericParameters(withConstraints: false))> { + \(kind.rawValue)(component) + } + + \(kind == .zeroOrOne ? + """ + extension RegexBuilder { + public static func buildLimitedAvailability<\(genericParameters(withConstraints: true))>( + _ component: Component + ) -> \(kind.typeName)_\(arity)<\(genericParameters(withConstraints: false))> { + \(kind.rawValue)(component) + } + } + """ : "") + + """) + } } diff --git a/Sources/_StringProcessing/RegexDSL/Builder.swift b/Sources/_StringProcessing/RegexDSL/Builder.swift index 754dff219..ba28972b9 100644 --- a/Sources/_StringProcessing/RegexDSL/Builder.swift +++ b/Sources/_StringProcessing/RegexDSL/Builder.swift @@ -33,10 +33,4 @@ public enum RegexBuilder { public static func buildEither(second component: R) -> R { component } - - public static func buildLimitedAvailability( - _ component: R - ) -> Optionally { - .init(component) - } } diff --git a/Sources/_StringProcessing/RegexDSL/Concatenation.swift b/Sources/_StringProcessing/RegexDSL/Concatenation.swift index 575384d33..57d54a6ff 100644 --- a/Sources/_StringProcessing/RegexDSL/Concatenation.swift +++ b/Sources/_StringProcessing/RegexDSL/Concatenation.swift @@ -1410,4 +1410,966 @@ extension RegexBuilder { } -// END AUTO-GENERATED CONTENT \ No newline at end of file +public struct _ZeroOrOne_0: RegexProtocol { + public typealias Match = Substring + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.zeroOrOne, .eager, component.regex.root)) + } +} + +@_disfavoredOverload +public func optionally( + _ component: Component +) -> _ZeroOrOne_0 { + .init(component: component) +} + +@_disfavoredOverload +public func optionally( + @RegexBuilder _ component: () -> Component +) -> _ZeroOrOne_0 { + optionally(component()) +} + +@_disfavoredOverload +public postfix func .?( + _ component: Component +) -> _ZeroOrOne_0 { + optionally(component) +} + +extension RegexBuilder { + public static func buildLimitedAvailability( + _ component: Component + ) -> _ZeroOrOne_0 { + optionally(component) + } +} +public struct _ZeroOrMore_0: RegexProtocol { + public typealias Match = Substring + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.zeroOrMore, .eager, component.regex.root)) + } +} + +@_disfavoredOverload +public func many( + _ component: Component +) -> _ZeroOrMore_0 { + .init(component: component) +} + +@_disfavoredOverload +public func many( + @RegexBuilder _ component: () -> Component +) -> _ZeroOrMore_0 { + many(component()) +} + +@_disfavoredOverload +public postfix func .+( + _ component: Component +) -> _ZeroOrMore_0 { + many(component) +} + + +public struct _OneOrMore_0: RegexProtocol { + public typealias Match = Substring + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.oneOrMore, .eager, component.regex.root)) + } +} + +@_disfavoredOverload +public func oneOrMore( + _ component: Component +) -> _OneOrMore_0 { + .init(component: component) +} + +@_disfavoredOverload +public func oneOrMore( + @RegexBuilder _ component: () -> Component +) -> _OneOrMore_0 { + oneOrMore(component()) +} + +@_disfavoredOverload +public postfix func .*( + _ component: Component +) -> _OneOrMore_0 { + oneOrMore(component) +} + + +public struct _ZeroOrOne_1: RegexProtocol where Component.Match == Tuple2 { + public typealias Match = Tuple2 + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.zeroOrOne, .eager, component.regex.root)) + } +} + + +public func optionally( + _ component: Component +) -> _ZeroOrOne_1 { + .init(component: component) +} + + +public func optionally( + @RegexBuilder _ component: () -> Component +) -> _ZeroOrOne_1 { + optionally(component()) +} + + +public postfix func .?( + _ component: Component +) -> _ZeroOrOne_1 { + optionally(component) +} + +extension RegexBuilder { + public static func buildLimitedAvailability( + _ component: Component + ) -> _ZeroOrOne_1 { + optionally(component) + } +} +public struct _ZeroOrMore_1: RegexProtocol where Component.Match == Tuple2 { + public typealias Match = Tuple2 + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.zeroOrMore, .eager, component.regex.root)) + } +} + + +public func many( + _ component: Component +) -> _ZeroOrMore_1 { + .init(component: component) +} + + +public func many( + @RegexBuilder _ component: () -> Component +) -> _ZeroOrMore_1 { + many(component()) +} + + +public postfix func .+( + _ component: Component +) -> _ZeroOrMore_1 { + many(component) +} + + +public struct _OneOrMore_1: RegexProtocol where Component.Match == Tuple2 { + public typealias Match = Tuple2 + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.oneOrMore, .eager, component.regex.root)) + } +} + + +public func oneOrMore( + _ component: Component +) -> _OneOrMore_1 { + .init(component: component) +} + + +public func oneOrMore( + @RegexBuilder _ component: () -> Component +) -> _OneOrMore_1 { + oneOrMore(component()) +} + + +public postfix func .*( + _ component: Component +) -> _OneOrMore_1 { + oneOrMore(component) +} + + +public struct _ZeroOrOne_2: RegexProtocol where Component.Match == Tuple3 { + public typealias Match = Tuple2?> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.zeroOrOne, .eager, component.regex.root)) + } +} + + +public func optionally( + _ component: Component +) -> _ZeroOrOne_2 { + .init(component: component) +} + + +public func optionally( + @RegexBuilder _ component: () -> Component +) -> _ZeroOrOne_2 { + optionally(component()) +} + + +public postfix func .?( + _ component: Component +) -> _ZeroOrOne_2 { + optionally(component) +} + +extension RegexBuilder { + public static func buildLimitedAvailability( + _ component: Component + ) -> _ZeroOrOne_2 { + optionally(component) + } +} +public struct _ZeroOrMore_2: RegexProtocol where Component.Match == Tuple3 { + public typealias Match = Tuple2]> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.zeroOrMore, .eager, component.regex.root)) + } +} + + +public func many( + _ component: Component +) -> _ZeroOrMore_2 { + .init(component: component) +} + + +public func many( + @RegexBuilder _ component: () -> Component +) -> _ZeroOrMore_2 { + many(component()) +} + + +public postfix func .+( + _ component: Component +) -> _ZeroOrMore_2 { + many(component) +} + + +public struct _OneOrMore_2: RegexProtocol where Component.Match == Tuple3 { + public typealias Match = Tuple2]> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.oneOrMore, .eager, component.regex.root)) + } +} + + +public func oneOrMore( + _ component: Component +) -> _OneOrMore_2 { + .init(component: component) +} + + +public func oneOrMore( + @RegexBuilder _ component: () -> Component +) -> _OneOrMore_2 { + oneOrMore(component()) +} + + +public postfix func .*( + _ component: Component +) -> _OneOrMore_2 { + oneOrMore(component) +} + + +public struct _ZeroOrOne_3: RegexProtocol where Component.Match == Tuple4 { + public typealias Match = Tuple2?> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.zeroOrOne, .eager, component.regex.root)) + } +} + + +public func optionally( + _ component: Component +) -> _ZeroOrOne_3 { + .init(component: component) +} + + +public func optionally( + @RegexBuilder _ component: () -> Component +) -> _ZeroOrOne_3 { + optionally(component()) +} + + +public postfix func .?( + _ component: Component +) -> _ZeroOrOne_3 { + optionally(component) +} + +extension RegexBuilder { + public static func buildLimitedAvailability( + _ component: Component + ) -> _ZeroOrOne_3 { + optionally(component) + } +} +public struct _ZeroOrMore_3: RegexProtocol where Component.Match == Tuple4 { + public typealias Match = Tuple2]> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.zeroOrMore, .eager, component.regex.root)) + } +} + + +public func many( + _ component: Component +) -> _ZeroOrMore_3 { + .init(component: component) +} + + +public func many( + @RegexBuilder _ component: () -> Component +) -> _ZeroOrMore_3 { + many(component()) +} + + +public postfix func .+( + _ component: Component +) -> _ZeroOrMore_3 { + many(component) +} + + +public struct _OneOrMore_3: RegexProtocol where Component.Match == Tuple4 { + public typealias Match = Tuple2]> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.oneOrMore, .eager, component.regex.root)) + } +} + + +public func oneOrMore( + _ component: Component +) -> _OneOrMore_3 { + .init(component: component) +} + + +public func oneOrMore( + @RegexBuilder _ component: () -> Component +) -> _OneOrMore_3 { + oneOrMore(component()) +} + + +public postfix func .*( + _ component: Component +) -> _OneOrMore_3 { + oneOrMore(component) +} + + +public struct _ZeroOrOne_4: RegexProtocol where Component.Match == Tuple5 { + public typealias Match = Tuple2?> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.zeroOrOne, .eager, component.regex.root)) + } +} + + +public func optionally( + _ component: Component +) -> _ZeroOrOne_4 { + .init(component: component) +} + + +public func optionally( + @RegexBuilder _ component: () -> Component +) -> _ZeroOrOne_4 { + optionally(component()) +} + + +public postfix func .?( + _ component: Component +) -> _ZeroOrOne_4 { + optionally(component) +} + +extension RegexBuilder { + public static func buildLimitedAvailability( + _ component: Component + ) -> _ZeroOrOne_4 { + optionally(component) + } +} +public struct _ZeroOrMore_4: RegexProtocol where Component.Match == Tuple5 { + public typealias Match = Tuple2]> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.zeroOrMore, .eager, component.regex.root)) + } +} + + +public func many( + _ component: Component +) -> _ZeroOrMore_4 { + .init(component: component) +} + + +public func many( + @RegexBuilder _ component: () -> Component +) -> _ZeroOrMore_4 { + many(component()) +} + + +public postfix func .+( + _ component: Component +) -> _ZeroOrMore_4 { + many(component) +} + + +public struct _OneOrMore_4: RegexProtocol where Component.Match == Tuple5 { + public typealias Match = Tuple2]> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.oneOrMore, .eager, component.regex.root)) + } +} + + +public func oneOrMore( + _ component: Component +) -> _OneOrMore_4 { + .init(component: component) +} + + +public func oneOrMore( + @RegexBuilder _ component: () -> Component +) -> _OneOrMore_4 { + oneOrMore(component()) +} + + +public postfix func .*( + _ component: Component +) -> _OneOrMore_4 { + oneOrMore(component) +} + + +public struct _ZeroOrOne_5: RegexProtocol where Component.Match == Tuple6 { + public typealias Match = Tuple2?> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.zeroOrOne, .eager, component.regex.root)) + } +} + + +public func optionally( + _ component: Component +) -> _ZeroOrOne_5 { + .init(component: component) +} + + +public func optionally( + @RegexBuilder _ component: () -> Component +) -> _ZeroOrOne_5 { + optionally(component()) +} + + +public postfix func .?( + _ component: Component +) -> _ZeroOrOne_5 { + optionally(component) +} + +extension RegexBuilder { + public static func buildLimitedAvailability( + _ component: Component + ) -> _ZeroOrOne_5 { + optionally(component) + } +} +public struct _ZeroOrMore_5: RegexProtocol where Component.Match == Tuple6 { + public typealias Match = Tuple2]> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.zeroOrMore, .eager, component.regex.root)) + } +} + + +public func many( + _ component: Component +) -> _ZeroOrMore_5 { + .init(component: component) +} + + +public func many( + @RegexBuilder _ component: () -> Component +) -> _ZeroOrMore_5 { + many(component()) +} + + +public postfix func .+( + _ component: Component +) -> _ZeroOrMore_5 { + many(component) +} + + +public struct _OneOrMore_5: RegexProtocol where Component.Match == Tuple6 { + public typealias Match = Tuple2]> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.oneOrMore, .eager, component.regex.root)) + } +} + + +public func oneOrMore( + _ component: Component +) -> _OneOrMore_5 { + .init(component: component) +} + + +public func oneOrMore( + @RegexBuilder _ component: () -> Component +) -> _OneOrMore_5 { + oneOrMore(component()) +} + + +public postfix func .*( + _ component: Component +) -> _OneOrMore_5 { + oneOrMore(component) +} + + +public struct _ZeroOrOne_6: RegexProtocol where Component.Match == Tuple7 { + public typealias Match = Tuple2?> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.zeroOrOne, .eager, component.regex.root)) + } +} + + +public func optionally( + _ component: Component +) -> _ZeroOrOne_6 { + .init(component: component) +} + + +public func optionally( + @RegexBuilder _ component: () -> Component +) -> _ZeroOrOne_6 { + optionally(component()) +} + + +public postfix func .?( + _ component: Component +) -> _ZeroOrOne_6 { + optionally(component) +} + +extension RegexBuilder { + public static func buildLimitedAvailability( + _ component: Component + ) -> _ZeroOrOne_6 { + optionally(component) + } +} +public struct _ZeroOrMore_6: RegexProtocol where Component.Match == Tuple7 { + public typealias Match = Tuple2]> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.zeroOrMore, .eager, component.regex.root)) + } +} + + +public func many( + _ component: Component +) -> _ZeroOrMore_6 { + .init(component: component) +} + + +public func many( + @RegexBuilder _ component: () -> Component +) -> _ZeroOrMore_6 { + many(component()) +} + + +public postfix func .+( + _ component: Component +) -> _ZeroOrMore_6 { + many(component) +} + + +public struct _OneOrMore_6: RegexProtocol where Component.Match == Tuple7 { + public typealias Match = Tuple2]> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.oneOrMore, .eager, component.regex.root)) + } +} + + +public func oneOrMore( + _ component: Component +) -> _OneOrMore_6 { + .init(component: component) +} + + +public func oneOrMore( + @RegexBuilder _ component: () -> Component +) -> _OneOrMore_6 { + oneOrMore(component()) +} + + +public postfix func .*( + _ component: Component +) -> _OneOrMore_6 { + oneOrMore(component) +} + + +public struct _ZeroOrOne_7: RegexProtocol where Component.Match == Tuple8 { + public typealias Match = Tuple2?> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.zeroOrOne, .eager, component.regex.root)) + } +} + + +public func optionally( + _ component: Component +) -> _ZeroOrOne_7 { + .init(component: component) +} + + +public func optionally( + @RegexBuilder _ component: () -> Component +) -> _ZeroOrOne_7 { + optionally(component()) +} + + +public postfix func .?( + _ component: Component +) -> _ZeroOrOne_7 { + optionally(component) +} + +extension RegexBuilder { + public static func buildLimitedAvailability( + _ component: Component + ) -> _ZeroOrOne_7 { + optionally(component) + } +} +public struct _ZeroOrMore_7: RegexProtocol where Component.Match == Tuple8 { + public typealias Match = Tuple2]> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.zeroOrMore, .eager, component.regex.root)) + } +} + + +public func many( + _ component: Component +) -> _ZeroOrMore_7 { + .init(component: component) +} + + +public func many( + @RegexBuilder _ component: () -> Component +) -> _ZeroOrMore_7 { + many(component()) +} + + +public postfix func .+( + _ component: Component +) -> _ZeroOrMore_7 { + many(component) +} + + +public struct _OneOrMore_7: RegexProtocol where Component.Match == Tuple8 { + public typealias Match = Tuple2]> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.oneOrMore, .eager, component.regex.root)) + } +} + + +public func oneOrMore( + _ component: Component +) -> _OneOrMore_7 { + .init(component: component) +} + + +public func oneOrMore( + @RegexBuilder _ component: () -> Component +) -> _OneOrMore_7 { + oneOrMore(component()) +} + + +public postfix func .*( + _ component: Component +) -> _OneOrMore_7 { + oneOrMore(component) +} + + +public struct _ZeroOrOne_8: RegexProtocol where Component.Match == Tuple9 { + public typealias Match = Tuple2?> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.zeroOrOne, .eager, component.regex.root)) + } +} + + +public func optionally( + _ component: Component +) -> _ZeroOrOne_8 { + .init(component: component) +} + + +public func optionally( + @RegexBuilder _ component: () -> Component +) -> _ZeroOrOne_8 { + optionally(component()) +} + + +public postfix func .?( + _ component: Component +) -> _ZeroOrOne_8 { + optionally(component) +} + +extension RegexBuilder { + public static func buildLimitedAvailability( + _ component: Component + ) -> _ZeroOrOne_8 { + optionally(component) + } +} +public struct _ZeroOrMore_8: RegexProtocol where Component.Match == Tuple9 { + public typealias Match = Tuple2]> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.zeroOrMore, .eager, component.regex.root)) + } +} + + +public func many( + _ component: Component +) -> _ZeroOrMore_8 { + .init(component: component) +} + + +public func many( + @RegexBuilder _ component: () -> Component +) -> _ZeroOrMore_8 { + many(component()) +} + + +public postfix func .+( + _ component: Component +) -> _ZeroOrMore_8 { + many(component) +} + + +public struct _OneOrMore_8: RegexProtocol where Component.Match == Tuple9 { + public typealias Match = Tuple2]> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.oneOrMore, .eager, component.regex.root)) + } +} + + +public func oneOrMore( + _ component: Component +) -> _OneOrMore_8 { + .init(component: component) +} + + +public func oneOrMore( + @RegexBuilder _ component: () -> Component +) -> _OneOrMore_8 { + oneOrMore(component()) +} + + +public postfix func .*( + _ component: Component +) -> _OneOrMore_8 { + oneOrMore(component) +} + + +public struct _ZeroOrOne_9: RegexProtocol where Component.Match == Tuple10 { + public typealias Match = Tuple2?> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.zeroOrOne, .eager, component.regex.root)) + } +} + + +public func optionally( + _ component: Component +) -> _ZeroOrOne_9 { + .init(component: component) +} + + +public func optionally( + @RegexBuilder _ component: () -> Component +) -> _ZeroOrOne_9 { + optionally(component()) +} + + +public postfix func .?( + _ component: Component +) -> _ZeroOrOne_9 { + optionally(component) +} + +extension RegexBuilder { + public static func buildLimitedAvailability( + _ component: Component + ) -> _ZeroOrOne_9 { + optionally(component) + } +} +public struct _ZeroOrMore_9: RegexProtocol where Component.Match == Tuple10 { + public typealias Match = Tuple2]> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.zeroOrMore, .eager, component.regex.root)) + } +} + + +public func many( + _ component: Component +) -> _ZeroOrMore_9 { + .init(component: component) +} + + +public func many( + @RegexBuilder _ component: () -> Component +) -> _ZeroOrMore_9 { + many(component()) +} + + +public postfix func .+( + _ component: Component +) -> _ZeroOrMore_9 { + many(component) +} + + +public struct _OneOrMore_9: RegexProtocol where Component.Match == Tuple10 { + public typealias Match = Tuple2]> + public let regex: Regex + public init(component: Component) { + self.regex = .init(node: .quantification(.oneOrMore, .eager, component.regex.root)) + } +} + + +public func oneOrMore( + _ component: Component +) -> _OneOrMore_9 { + .init(component: component) +} + + +public func oneOrMore( + @RegexBuilder _ component: () -> Component +) -> _OneOrMore_9 { + oneOrMore(component()) +} + + +public postfix func .*( + _ component: Component +) -> _OneOrMore_9 { + oneOrMore(component) +} + + + + +// END AUTO-GENERATED CONTENT diff --git a/Sources/_StringProcessing/RegexDSL/Core.swift b/Sources/_StringProcessing/RegexDSL/Core.swift index bcdf96c5d..00630403c 100644 --- a/Sources/_StringProcessing/RegexDSL/Core.swift +++ b/Sources/_StringProcessing/RegexDSL/Core.swift @@ -27,28 +27,6 @@ public protocol RegexProtocol { var regex: Regex { get } } -/// A `RegexProtocol` that has a single component child. -/// -/// This protocol adds an init supporting static lookup for character classes -public protocol RegexProtocolWithComponent: RegexProtocol { - associatedtype Component: RegexProtocol - - // Label needed for disambiguation - init(component: Component) -} -extension RegexProtocolWithComponent -where Component == CharacterClass { - // This gives us static member lookup - public init(_ component: Component) { - self.init(component: component) - } -} -extension RegexProtocolWithComponent { - public init(_ component: Component) { - self.init(component: component) - } -} - /// A regular expression. public struct Regex: RegexProtocol { /// A program representation that caches any lowered representation for diff --git a/Sources/_StringProcessing/RegexDSL/DSL.swift b/Sources/_StringProcessing/RegexDSL/DSL.swift index d90af6db8..5bab7d5a8 100644 --- a/Sources/_StringProcessing/RegexDSL/DSL.swift +++ b/Sources/_StringProcessing/RegexDSL/DSL.swift @@ -44,93 +44,79 @@ extension CharacterClass: RegexProtocol { } } + // MARK: - Combinators -// TODO: We want variadic generics! -// Overloads are auto-generated in Concatenation.swift. -// -// public struct Concatenate: RegexContent { -// public let regex: Regex<(R...).filter { $0 != Void.self }> -// -// public init(_ components: R...) { -// regex = .init(ast: .concatenation([#splat(components...)])) +// MARK: Concatenation + +// Note: Concatenation overloads are currently gyb'd. + +// TODO: Variadic generics +// struct Concatenation +// where R0.Match == (W0, C0...), R1.Match == (W1, C1...) +// { +// typealias Match = (Substring, C0..., C1...) +// let regex: Regex +// init(_ first: R0, _ second: R1) { +// regex = .init(concat(r0, r1)) // } // } -// MARK: Repetition +// MARK: Quantification -/// A regular expression. -public struct OneOrMore: RegexProtocolWithComponent { - public typealias Match = Tuple2 +// Note: Quantifiers are currently gyb'd. - public let regex: Regex - - public init(component: Component) { - self.regex = .init(node: .quantification( - .oneOrMore, .eager, component.regex.root) - ) - } +// TODO: Variadic generics +// struct _OneOrMore +// where R.Match == (W, C...) +// { +// typealias Match = (Substring, [(C...)]) +// let regex: Regex +// init(_ component: Component) { +// regex = .init(oneOrMore(r0)) +// } +// } +// +// struct _OneOrMoreNonCapturing { +// typealias Match = Substring +// let regex: Regex +// init(_ component: Component) { +// regex = .init(oneOrMore(r0)) +// } +// } +// +// func oneOrMore( +// _ component: Component +// ) -> R { +// _OneOrMore(component) +// } +// +// @_disfavoredOverload +// func oneOrMore( +// _ component: Component +// ) -> R { +// _OneOrMoreNonCapturing(component) +// } - public init(@RegexBuilder _ content: () -> Component) { - self.init(content()) - } -} +postfix operator .? +postfix operator .* postfix operator .+ -public postfix func .+ ( - lhs: R -) -> OneOrMore { - .init(lhs) -} - -public struct Repeat< - Component: RegexProtocol ->: RegexProtocolWithComponent { - public typealias Match = Tuple2 - - public let regex: Regex - - public init(component: Component) { - self.regex = .init(node: .quantification( - .zeroOrMore, .eager, component.regex.root)) - } - - public init(@RegexBuilder _ content: () -> Component) { - self.init(content()) - } +// Overloads for quantifying over a character class. +public func zeroOrOne(_ cc: CharacterClass) -> _ZeroOrOne_0 { + .init(component: cc) } -postfix operator .* - -public postfix func .* ( - lhs: R -) -> Repeat { - .init(lhs) +public func many(_ cc: CharacterClass) -> _ZeroOrMore_0 { + .init(component: cc) } -public struct Optionally: RegexProtocolWithComponent { - public typealias Match = Tuple2 - - public let regex: Regex - - public init(component: Component) { - self.regex = .init(node: .quantification( - .zeroOrOne, .eager, component.regex.root)) - } - - public init(@RegexBuilder _ content: () -> Component) { - self.init(content()) - } +public func oneOrMore(_ cc: CharacterClass) -> _OneOrMore_0 { + .init(component: cc) } -postfix operator .? - -public postfix func .? ( - lhs: R -) -> Optionally { - .init(lhs) -} +// MARK: Alternation // TODO: Support heterogeneous capture alternation. public struct Alternation< diff --git a/Tests/RegexTests/AlgorithmsTests.swift b/Tests/RegexTests/AlgorithmsTests.swift index 5a848a6e4..8041214b0 100644 --- a/Tests/RegexTests/AlgorithmsTests.swift +++ b/Tests/RegexTests/AlgorithmsTests.swift @@ -116,7 +116,7 @@ class RegexConsumerTests: XCTestCase { } func testMatches() { - let regex = Regex(OneOrMore(.digit).capture { 2 * Int($0)! }) + let regex = Regex(oneOrMore(.digit).capture { 2 * Int($0)! }) let str = "foo 160 bar 99 baz" XCTAssertEqual(str.matches(of: regex).map(\.result.1), [320, 198]) } @@ -133,7 +133,7 @@ class RegexConsumerTests: XCTestCase { XCTAssertEqual(input.replacing(regex, with: replace), result) } - let int = OneOrMore(.digit).capture { Int($0)! } + let int = oneOrMore(.digit).capture { Int($0)! } replaceTest( int, @@ -148,13 +148,13 @@ class RegexConsumerTests: XCTestCase { { match in "\(match.result.1 + match.result.2)" }) replaceTest( - OneOrMore { int; "," }, + oneOrMore { int; "," }, input: "3,5,8,0, 1,0,2,-5,x8,8,", result: "16 3-5x16", { match in "\(match.result.1.reduce(0, +))" }) replaceTest( - Regex { int; "x"; int; Optionally { "x"; int } }, + Regex { int; "x"; int; optionally { "x"; int } }, input: "2x3 5x4x3 6x0 1x2x3x4", result: "6 60 0 6x4", { match in "\(match.result.1 * match.result.2 * (match.result.3 ?? 1))" }) diff --git a/Tests/RegexTests/RegexDSLTests.swift b/Tests/RegexTests/RegexDSLTests.swift index 629e91240..90f087011 100644 --- a/Tests/RegexTests/RegexDSLTests.swift +++ b/Tests/RegexTests/RegexDSLTests.swift @@ -47,8 +47,8 @@ class RegexDSLTests: XCTestCase { func testCombinators() throws { let regex = Regex { "a".+ - OneOrMore(Character("b")).capture() // Substring - Repeat("c").capture() // Substring + oneOrMore(Character("b")).capture() // Substring + many("c").capture() // Substring CharacterClass.hexDigit.capture().* // [Substring] "e".? ("t" | "k").capture() // Substring @@ -66,9 +66,9 @@ class RegexDSLTests: XCTestCase { func testNestedGroups() throws { let regex = Regex { "a".+ - OneOrMore { - OneOrMore("b").capture() - Repeat("c").capture() + oneOrMore { + oneOrMore("b").capture() + many("c").capture() "d".capture().* "e".? } @@ -85,6 +85,19 @@ class RegexDSLTests: XCTestCase { == Tuple3("b", "cccc", ["d", "d", "d"])) } + func testCapturelessQuantification() throws { + // This test is to make sure that a captureless quantification, when used + // straight out of the quantifier (without being wrapped in a builder), is + // able to produce a regex whose `Match` type does not contain any sort of + // void. + let regex = many(.digit) + // Assert the inferred capture type. + let _: Substring.Type = type(of: regex).Match.self + let input = "123123" + let match = try XCTUnwrap(input.match(regex)?.match) + XCTAssertTrue(match == input) + } + func testQuantificationWithTransformedCapture() throws { // This test is to make sure transformed capture type information is // correctly propagated from the DSL into the bytecode and that the engine @@ -104,18 +117,17 @@ class RegexDSLTests: XCTestCase { } let regex = Regex { "a".+ - OneOrMore(.whitespace) - Optionally { - OneOrMore(.digit).capture { Int($0)! } + oneOrMore(.whitespace) + optionally { + oneOrMore(.digit).capture { Int($0)! } } - Repeat { - OneOrMore(.whitespace) - OneOrMore(.word).capture { Word($0)! } + many { + oneOrMore(.whitespace) + oneOrMore(.word).capture { Word($0)! } } } // Assert the inferred capture type. - let _: Tuple3.Type - = type(of: regex).Match.self + let _: Tuple3.Type = type(of: regex).Match.self do { let input = "aaa 123 apple orange apple" let match = input.match(regex)?.match.tuple @@ -138,7 +150,7 @@ class RegexDSLTests: XCTestCase { let regex1 = Regex { "a".+ Regex { - OneOrMore("b").capture() + oneOrMore("b").capture() "e".? }.capture() } @@ -165,9 +177,9 @@ class RegexDSLTests: XCTestCase { = type(of: regex3).Match.self let regex4 = Regex { "a".+ - OneOrMore { - OneOrMore("b").capture() - Repeat("c").capture() + oneOrMore { + oneOrMore("b").capture() + many("c").capture() "d".capture().* "e".? }.capture() @@ -179,13 +191,13 @@ class RegexDSLTests: XCTestCase { func testUnicodeScalarPostProcessing() throws { let spaces = Regex { - Repeat { + many { CharacterClass.whitespace } } let unicodeScalar = Regex { - OneOrMore { + oneOrMore { CharacterClass.hexDigit } spaces @@ -193,7 +205,7 @@ class RegexDSLTests: XCTestCase { let unicodeData = Regex { unicodeScalar - Optionally { + optionally { ".." unicodeScalar } @@ -201,11 +213,11 @@ class RegexDSLTests: XCTestCase { ";" spaces - OneOrMore { + oneOrMore { CharacterClass.word }.capture() - Repeat { + many { CharacterClass.any } } @@ -226,16 +238,16 @@ class RegexDSLTests: XCTestCase { """ let regexWithCapture = Regex { - OneOrMore(CharacterClass.hexDigit).capture(Unicode.Scalar.init(hex:)) - Optionally { + oneOrMore(CharacterClass.hexDigit).capture(Unicode.Scalar.init(hex:)) + optionally { ".." - OneOrMore(CharacterClass.hexDigit).capture(Unicode.Scalar.init(hex:)) + oneOrMore(CharacterClass.hexDigit).capture(Unicode.Scalar.init(hex:)) } - OneOrMore(CharacterClass.whitespace) + oneOrMore(CharacterClass.whitespace) ";" - OneOrMore(CharacterClass.whitespace) - OneOrMore(CharacterClass.word).capture() - Repeat(CharacterClass.any) + oneOrMore(CharacterClass.whitespace) + oneOrMore(CharacterClass.word).capture() + many(CharacterClass.any) } // Regex<(Substring, Unicode.Scalar?, Unicode.Scalar??, Substring)> do { // Assert the inferred capture type. @@ -253,16 +265,16 @@ class RegexDSLTests: XCTestCase { } let regexWithTryCapture = Regex { - OneOrMore(CharacterClass.hexDigit).tryCapture(Unicode.Scalar.init(hex:)) - Optionally { + oneOrMore(CharacterClass.hexDigit).tryCapture(Unicode.Scalar.init(hex:)) + optionally { ".." - OneOrMore(CharacterClass.hexDigit).tryCapture(Unicode.Scalar.init(hex:)) + oneOrMore(CharacterClass.hexDigit).tryCapture(Unicode.Scalar.init(hex:)) } - OneOrMore(CharacterClass.whitespace) + oneOrMore(CharacterClass.whitespace) ";" - OneOrMore(CharacterClass.whitespace) - OneOrMore(CharacterClass.word).capture() - Repeat(CharacterClass.any) + oneOrMore(CharacterClass.whitespace) + oneOrMore(CharacterClass.word).capture() + many(CharacterClass.any) } // Regex<(Substring, Unicode.Scalar, Unicode.Scalar?, Substring)> do { // Assert the inferred capture type.