diff --git a/Sources/_RegexParser/Regex/Parse/Sema.swift b/Sources/_RegexParser/Regex/Parse/Sema.swift index 83c014d2a..c66ab9246 100644 --- a/Sources/_RegexParser/Regex/Parse/Sema.swift +++ b/Sources/_RegexParser/Regex/Parse/Sema.swift @@ -106,8 +106,13 @@ extension RegexValidator { case .byteSemantics: throw error(.unsupported("byte semantic mode"), at: loc) + case .unicodeScalarSemantics: + throw error(.unsupported("unicode scalar semantic mode"), at: loc) + + case .graphemeClusterSemantics: + throw error(.unsupported("grapheme semantic mode"), at: loc) + case .caseInsensitive, .possessiveByDefault, .reluctantByDefault, - .unicodeScalarSemantics, .graphemeClusterSemantics, .singleLine, .multiline, .namedCapturesOnly, .extended, .extraExtended, .asciiOnlyDigit, .asciiOnlyWord, .asciiOnlySpace, .asciiOnlyPOSIXProps: break diff --git a/Tests/RegexTests/CompileTests.swift b/Tests/RegexTests/CompileTests.swift index 9e94a886a..3f6532d82 100644 --- a/Tests/RegexTests/CompileTests.swift +++ b/Tests/RegexTests/CompileTests.swift @@ -124,9 +124,13 @@ extension RegexTests { "(?im)(?s).", matchingOptions(adding: [.caseInsensitive, .multiline, .singleLine])) try expectInitialOptions(".", matchingOptions()) - try expectInitialOptions( - "(?im)(?s).(?u)", - matchingOptions(adding: [.caseInsensitive, .multiline, .singleLine])) + + // FIXME: Figure out (?X) and (?u) semantics + try XCTExpectFailure("Figure out (?X) and (?u) semantics") { + try expectInitialOptions( + "(?im)(?s).(?u)", + matchingOptions(adding: [.caseInsensitive, .multiline, .singleLine])) + } try expectInitialOptions( "(?i:.)", diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index 8cecd26c8..98aa23ca8 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -1020,10 +1020,12 @@ extension RegexTests { #"\u{65}\y"#, // Grapheme boundary assertion ("Cafe\u{301}", nil), ("Sol Cafe", "e")) + + // FIXME: Figure out (?X) and (?u) semantics firstMatchTests( #"(?u)\u{65}\Y"#, // Grapheme non-boundary assertion ("Cafe\u{301}", "e"), - ("Sol Cafe", nil)) + ("Sol Cafe", nil), xfail: true) } func testMatchGroups() { @@ -1586,7 +1588,8 @@ extension RegexTests { // a single Unicode scalar value, leaving any other grapheme scalar // components to be matched. - firstMatchTest(#"(?u:.)"#, input: eDecomposed, match: "e") + // FIXME: Figure out (?X) and (?u) semantics + firstMatchTest(#"(?u:.)"#, input: eDecomposed, match: "e", xfail: true) matchTest( #".\u{301}"#, @@ -1597,18 +1600,30 @@ extension RegexTests { (eComposed, false), (eDecomposed, false)) + // FIXME: Figure out (?X) and (?u) semantics // FIXME: \O is unsupported - firstMatchTest(#"(?u)\O\u{301}"#, input: eDecomposed, match: eDecomposed) - firstMatchTest(#"(?u)e\O"#, input: eDecomposed, match: eDecomposed) + firstMatchTest( + #"(?u)\O\u{301}"#, + input: eDecomposed, + match: eDecomposed, + xfail: true + ) + firstMatchTest( + #"(?u)e\O"#, + input: eDecomposed, + match: eDecomposed, + xfail: true + ) firstMatchTest(#"\O"#, input: eComposed, match: eComposed) firstMatchTest(#"\O"#, input: eDecomposed, match: nil, xfail: true) + // FIXME: Figure out (?X) and (?u) semantics matchTest( #"(?u).\u{301}"#, (eComposed, false), - (eDecomposed, true)) - firstMatchTest(#"(?u).$"#, input: eComposed, match: eComposed) + (eDecomposed, true), xfail: true) + firstMatchTest(#"(?u).$"#, input: eComposed, match: eComposed, xfail: true) // Option permutations for 'u' and 's' matchTest( @@ -1621,14 +1636,16 @@ extension RegexTests { ("e\u{301}ab", false), ("e\u{301}abc", true), ("e\u{301}\nab", true)) + + // FIXME: Figure out (?X) and (?u) semantics matchTest( #"(?u)...."#, ("e\u{301}ab", true), - ("e\u{301}\na", false)) + ("e\u{301}\na", false), xfail: true) matchTest( #"(?us)...."#, ("e\u{301}ab", true), - ("e\u{301}\na", true)) + ("e\u{301}\na", true), xfail: true) } // TODO: Add test for implied grapheme cluster requirement at group boundaries diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift index 5beb67448..97f9ba908 100644 --- a/Tests/RegexTests/ParseTests.swift +++ b/Tests/RegexTests/ParseTests.swift @@ -1074,9 +1074,11 @@ extension RegexTests { .singleLine, .reluctantByDefault, .extraExtended, .extended, .unicodeWordBoundaries, .asciiOnlyDigit, .asciiOnlyPOSIXProps, .asciiOnlySpace, .asciiOnlyWord, .textSegmentGraphemeMode, - .textSegmentWordMode, .graphemeClusterSemantics, .unicodeScalarSemantics, + .textSegmentWordMode, + .graphemeClusterSemantics, .unicodeScalarSemantics, .byteSemantics ] + parseTest("(?iJmnsUxxxwDPSWy{g}y{w}Xub-iJmnsUxxxwDPSW)", changeMatchingOptions( matchingOptions(adding: allOptions, removing: allOptions.dropLast(5)) ), throwsError: .unsupported) @@ -2787,8 +2789,9 @@ extension RegexTests { diagnosticTest("(?-y{g})", .cannotRemoveTextSegmentOptions) diagnosticTest("(?-y{w})", .cannotRemoveTextSegmentOptions) - diagnosticTest("(?-X)", .cannotRemoveSemanticsOptions) - diagnosticTest("(?-u)", .cannotRemoveSemanticsOptions) + // FIXME: Reenable once we figure out (?X) and (?u) semantics + //diagnosticTest("(?-X)", .cannotRemoveSemanticsOptions) + //diagnosticTest("(?-u)", .cannotRemoveSemanticsOptions) diagnosticTest("(?-b)", .cannotRemoveSemanticsOptions) diagnosticTest("(?a)", .unknownGroupKind("?a")) diff --git a/Tests/RegexTests/UTS18Tests.swift b/Tests/RegexTests/UTS18Tests.swift index 7306632da..be01fecb3 100644 --- a/Tests/RegexTests/UTS18Tests.swift +++ b/Tests/RegexTests/UTS18Tests.swift @@ -62,7 +62,7 @@ fileprivate func expectFirstMatch( } #if os(Linux) -func XCTExpectFailure(_ message: String? = nil, body: () -> Void) {} +func XCTExpectFailure(_ message: String? = nil, body: () throws -> Void) rethrows {} #endif // MARK: - Basic Unicode Support: Level 1 @@ -466,7 +466,12 @@ extension UTS18Tests { // Matching semantic level XCTAssertFalse("๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ".contains(regex(#".\N{ZERO WIDTH JOINER}"#))) - XCTAssertTrue("๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ".contains(regex(#"(?u).\N{ZERO WIDTH JOINER}"#))) + + // FIXME: Figure out (?X) and (?u) semantics + XCTExpectFailure("Figure out (?X) and (?u) semantics") { + XCTFail(#"(?u).\N{ZERO WIDTH JOINER}"#) + //XCTAssertTrue("๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ".contains(regex(#"(?u).\N{ZERO WIDTH JOINER}"#))) + } } func testIndividuallyNamedCharacters_XFail() {