diff --git a/Sources/_RegexParser/Regex/Parse/Parse.swift b/Sources/_RegexParser/Regex/Parse/Parse.swift index 112f32358..013bb1361 100644 --- a/Sources/_RegexParser/Regex/Parse/Parse.swift +++ b/Sources/_RegexParser/Regex/Parse/Parse.swift @@ -465,16 +465,6 @@ extension Parser { var members: Array = [] try parseCCCMembers(into: &members) - // If we didn't parse any semantic members, we can eat a ']' character, as - // PCRE, Oniguruma, and ICU forbid empty character classes, and assume an - // initial ']' is literal. - if members.none(\.isSemantic) { - if let loc = source.tryEatWithLoc("]") { - members.append(.atom(.init(.char("]"), loc))) - try parseCCCMembers(into: &members) - } - } - // If we have a binary set operator, parse it and the next members. Note // that this means we left associate for a chain of operators. // TODO: We may want to diagnose and require users to disambiguate, at least diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index 36056e85a..89a645375 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -1282,7 +1282,6 @@ extension RegexTests { firstMatchTest(#"(?xx)[ \t]+"#, input: " \t \t", match: "\t") firstMatchTest("(?xx)[ a && ab ]+", input: " aaba ", match: "aa") - firstMatchTest("(?xx)[ ] a ]+", input: " a]]a ] ", match: "a]]a") } func testASCIIClasses() { diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift index ed930b0fe..62eb2f6bd 100644 --- a/Tests/RegexTests/ParseTests.swift +++ b/Tests/RegexTests/ParseTests.swift @@ -528,23 +528,7 @@ extension RegexTests { )) parseTest("[-]", charClass("-")) - - // Empty character classes are forbidden, therefore these are character - // classes containing literal ']'. - parseTest("[]]", charClass("]")) - parseTest("[]a]", charClass("]", "a")) - parseTest("(?x)[ ]]", concat( - changeMatchingOptions(matchingOptions(adding: .extended)), - charClass("]") - )) - parseTest("(?x)[ ] ]", concat( - changeMatchingOptions(matchingOptions(adding: .extended)), - charClass("]") - )) - parseTest("(?x)[ ] a ]", concat( - changeMatchingOptions(matchingOptions(adding: .extended)), - charClass("]", "a") - )) + parseTest(#"[\]]"#, charClass("]")) // These are metacharacters in certain contexts, but normal characters // otherwise. @@ -2497,10 +2481,15 @@ extension RegexTests { diagnosticTest("[a", .expected("]")) - // The first ']' of a custom character class is literal, so these are - // missing the closing bracket. - diagnosticTest("[]", .expected("]")) - diagnosticTest("(?x)[ ]", .expected("]")) + // Character classes may not be empty. + diagnosticTest("[]", .expectedCustomCharacterClassMembers) + diagnosticTest("[]]", .expectedCustomCharacterClassMembers) + diagnosticTest("[]a]", .expectedCustomCharacterClassMembers) + diagnosticTest("(?x)[ ]", .expectedCustomCharacterClassMembers) + diagnosticTest("(?x)[ ] ]", .expectedCustomCharacterClassMembers) + diagnosticTest("(?x)[ ] a ]", .expectedCustomCharacterClassMembers) + diagnosticTest("(?xx)[ ] a ]+", .expectedCustomCharacterClassMembers) + diagnosticTest("(?x)[ ]]", .expectedCustomCharacterClassMembers) diagnosticTest("[&&]", .expectedCustomCharacterClassMembers) diagnosticTest("[a&&]", .expectedCustomCharacterClassMembers)