@@ -884,31 +884,137 @@ def test_named_unicode_escapes(self):
884884 self .checkPatternError (br'\N{LESS-THAN SIGN}' , r'bad escape \N' , 0 )
885885 self .checkPatternError (br'[\N{LESS-THAN SIGN}]' , r'bad escape \N' , 1 )
886886
887- def test_string_boundaries (self ):
887+ def test_word_boundaries (self ):
888888 # See http://bugs.python.org/issue10713
889- self .assertEqual (re .search (r"\b(abc)\b" , "abc" ).group (1 ),
890- "abc" )
889+ self .assertEqual (re .search (r"\b(abc)\b" , "abc" ).group (1 ), "abc" )
890+ self .assertEqual (re .search (r"\b(abc)\b" , "abc" , re .ASCII ).group (1 ), "abc" )
891+ self .assertEqual (re .search (br"\b(abc)\b" , b"abc" ).group (1 ), b"abc" )
892+ self .assertEqual (re .search (br"\b(abc)\b" , b"abc" , re .LOCALE ).group (1 ), b"abc" )
893+ self .assertEqual (re .search (r"\b(ьюя)\b" , "ьюя" ).group (1 ), "ьюя" )
894+ self .assertIsNone (re .search (r"\b(ьюя)\b" , "ьюя" , re .ASCII ))
895+ # There's a word boundary between a word and a non-word.
896+ self .assertTrue (re .match (r".\b" , "a=" ))
897+ self .assertTrue (re .match (r".\b" , "a=" , re .ASCII ))
898+ self .assertTrue (re .match (br".\b" , b"a=" ))
899+ self .assertTrue (re .match (br".\b" , b"a=" , re .LOCALE ))
900+ self .assertTrue (re .match (r".\b" , "я=" ))
901+ self .assertIsNone (re .match (r".\b" , "я=" , re .ASCII ))
902+ # There's a word boundary between a non-word and a word.
903+ self .assertTrue (re .match (r".\b" , "=a" ))
904+ self .assertTrue (re .match (r".\b" , "=a" , re .ASCII ))
905+ self .assertTrue (re .match (br".\b" , b"=a" ))
906+ self .assertTrue (re .match (br".\b" , b"=a" , re .LOCALE ))
907+ self .assertTrue (re .match (r".\b" , "=я" ))
908+ self .assertIsNone (re .match (r".\b" , "=я" , re .ASCII ))
909+ # There is no word boundary inside a word.
910+ self .assertIsNone (re .match (r".\b" , "ab" ))
911+ self .assertIsNone (re .match (r".\b" , "ab" , re .ASCII ))
912+ self .assertIsNone (re .match (br".\b" , b"ab" ))
913+ self .assertIsNone (re .match (br".\b" , b"ab" , re .LOCALE ))
914+ self .assertIsNone (re .match (r".\b" , "юя" ))
915+ self .assertIsNone (re .match (r".\b" , "юя" , re .ASCII ))
916+ # There is no word boundary between a non-word characters.
917+ self .assertIsNone (re .match (r".\b" , "=-" ))
918+ self .assertIsNone (re .match (r".\b" , "=-" , re .ASCII ))
919+ self .assertIsNone (re .match (br".\b" , b"=-" ))
920+ self .assertIsNone (re .match (br".\b" , b"=-" , re .LOCALE ))
921+ # There is no non-boundary match between a word and a non-word.
922+ self .assertIsNone (re .match (r".\B" , "a=" ))
923+ self .assertIsNone (re .match (r".\B" , "a=" , re .ASCII ))
924+ self .assertIsNone (re .match (br".\B" , b"a=" ))
925+ self .assertIsNone (re .match (br".\B" , b"a=" , re .LOCALE ))
926+ self .assertIsNone (re .match (r".\B" , "я=" ))
927+ self .assertTrue (re .match (r".\B" , "я=" , re .ASCII ))
928+ # There is no non-boundary match between a non-word and a word.
929+ self .assertIsNone (re .match (r".\B" , "=a" ))
930+ self .assertIsNone (re .match (r".\B" , "=a" , re .ASCII ))
931+ self .assertIsNone (re .match (br".\B" , b"=a" ))
932+ self .assertIsNone (re .match (br".\B" , b"=a" , re .LOCALE ))
933+ self .assertIsNone (re .match (r".\B" , "=я" ))
934+ self .assertTrue (re .match (r".\B" , "=я" , re .ASCII ))
935+ # There's a non-boundary match inside a word.
936+ self .assertTrue (re .match (r".\B" , "ab" ))
937+ self .assertTrue (re .match (r".\B" , "ab" , re .ASCII ))
938+ self .assertTrue (re .match (br".\B" , b"ab" ))
939+ self .assertTrue (re .match (br".\B" , b"ab" , re .LOCALE ))
940+ self .assertTrue (re .match (r".\B" , "юя" ))
941+ self .assertTrue (re .match (r".\B" , "юя" , re .ASCII ))
942+ # There's a non-boundary match between a non-word characters.
943+ self .assertTrue (re .match (r".\B" , "=-" ))
944+ self .assertTrue (re .match (r".\B" , "=-" , re .ASCII ))
945+ self .assertTrue (re .match (br".\B" , b"=-" ))
946+ self .assertTrue (re .match (br".\B" , b"=-" , re .LOCALE ))
891947 # There's a word boundary at the start of a string.
892948 self .assertTrue (re .match (r"\b" , "abc" ))
949+ self .assertTrue (re .match (r"\b" , "abc" , re .ASCII ))
950+ self .assertTrue (re .match (br"\b" , b"abc" ))
951+ self .assertTrue (re .match (br"\b" , b"abc" , re .LOCALE ))
952+ self .assertTrue (re .match (r"\b" , "ьюя" ))
953+ self .assertIsNone (re .match (r"\b" , "ьюя" , re .ASCII ))
954+ # There's a word boundary at the end of a string.
955+ self .assertTrue (re .fullmatch (r".+\b" , "abc" ))
956+ self .assertTrue (re .fullmatch (r".+\b" , "abc" , re .ASCII ))
957+ self .assertTrue (re .fullmatch (br".+\b" , b"abc" ))
958+ self .assertTrue (re .fullmatch (br".+\b" , b"abc" , re .LOCALE ))
959+ self .assertTrue (re .fullmatch (r".+\b" , "ьюя" ))
960+ self .assertIsNone (re .search (r"\b" , "ьюя" , re .ASCII ))
893961 # A non-empty string includes a non-boundary zero-length match.
894- self .assertTrue (re .search (r"\B" , "abc" ))
962+ self .assertEqual (re .search (r"\B" , "abc" ).span (), (1 , 1 ))
963+ self .assertEqual (re .search (r"\B" , "abc" , re .ASCII ).span (), (1 , 1 ))
964+ self .assertEqual (re .search (br"\B" , b"abc" ).span (), (1 , 1 ))
965+ self .assertEqual (re .search (br"\B" , b"abc" , re .LOCALE ).span (), (1 , 1 ))
966+ self .assertEqual (re .search (r"\B" , "ьюя" ).span (), (1 , 1 ))
967+ self .assertEqual (re .search (r"\B" , "ьюя" , re .ASCII ).span (), (0 , 0 ))
895968 # There is no non-boundary match at the start of a string.
896- self .assertFalse (re .match (r"\B" , "abc" ))
969+ self .assertIsNone (re .match (r"\B" , "abc" ))
970+ self .assertIsNone (re .match (r"\B" , "abc" , re .ASCII ))
971+ self .assertIsNone (re .match (br"\B" , b"abc" ))
972+ self .assertIsNone (re .match (br"\B" , b"abc" , re .LOCALE ))
973+ self .assertIsNone (re .match (r"\B" , "ьюя" ))
974+ self .assertTrue (re .match (r"\B" , "ьюя" , re .ASCII ))
975+ # There is no non-boundary match at the end of a string.
976+ self .assertIsNone (re .fullmatch (r".+\B" , "abc" ))
977+ self .assertIsNone (re .fullmatch (r".+\B" , "abc" , re .ASCII ))
978+ self .assertIsNone (re .fullmatch (br".+\B" , b"abc" ))
979+ self .assertIsNone (re .fullmatch (br".+\B" , b"abc" , re .LOCALE ))
980+ self .assertIsNone (re .fullmatch (r".+\B" , "ьюя" ))
981+ self .assertTrue (re .fullmatch (r".+\B" , "ьюя" , re .ASCII ))
897982 # However, an empty string contains no word boundaries, and also no
898983 # non-boundaries.
899- self .assertIsNone (re .search (r"\B" , "" ))
984+ self .assertIsNone (re .search (r"\b" , "" ))
985+ self .assertIsNone (re .search (r"\b" , "" , re .ASCII ))
986+ self .assertIsNone (re .search (br"\b" , b"" ))
987+ self .assertIsNone (re .search (br"\b" , b"" , re .LOCALE ))
900988 # This one is questionable and different from the perlre behaviour,
901989 # but describes current behavior.
902- self .assertIsNone (re .search (r"\b" , "" ))
990+ self .assertIsNone (re .search (r"\B" , "" ))
991+ self .assertIsNone (re .search (r"\B" , "" , re .ASCII ))
992+ self .assertIsNone (re .search (br"\B" , b"" ))
993+ self .assertIsNone (re .search (br"\B" , b"" , re .LOCALE ))
903994 # A single word-character string has two boundaries, but no
904995 # non-boundary gaps.
905996 self .assertEqual (len (re .findall (r"\b" , "a" )), 2 )
997+ self .assertEqual (len (re .findall (r"\b" , "a" , re .ASCII )), 2 )
998+ self .assertEqual (len (re .findall (br"\b" , b"a" )), 2 )
999+ self .assertEqual (len (re .findall (br"\b" , b"a" , re .LOCALE )), 2 )
9061000 self .assertEqual (len (re .findall (r"\B" , "a" )), 0 )
1001+ self .assertEqual (len (re .findall (r"\B" , "a" , re .ASCII )), 0 )
1002+ self .assertEqual (len (re .findall (br"\B" , b"a" )), 0 )
1003+ self .assertEqual (len (re .findall (br"\B" , b"a" , re .LOCALE )), 0 )
9071004 # If there are no words, there are no boundaries
9081005 self .assertEqual (len (re .findall (r"\b" , " " )), 0 )
1006+ self .assertEqual (len (re .findall (r"\b" , " " , re .ASCII )), 0 )
1007+ self .assertEqual (len (re .findall (br"\b" , b" " )), 0 )
1008+ self .assertEqual (len (re .findall (br"\b" , b" " , re .LOCALE )), 0 )
9091009 self .assertEqual (len (re .findall (r"\b" , " " )), 0 )
1010+ self .assertEqual (len (re .findall (r"\b" , " " , re .ASCII )), 0 )
1011+ self .assertEqual (len (re .findall (br"\b" , b" " )), 0 )
1012+ self .assertEqual (len (re .findall (br"\b" , b" " , re .LOCALE )), 0 )
9101013 # Can match around the whitespace.
9111014 self .assertEqual (len (re .findall (r"\B" , " " )), 2 )
1015+ self .assertEqual (len (re .findall (r"\B" , " " , re .ASCII )), 2 )
1016+ self .assertEqual (len (re .findall (br"\B" , b" " )), 2 )
1017+ self .assertEqual (len (re .findall (br"\B" , b" " , re .LOCALE )), 2 )
9121018
9131019 def test_bigcharset (self ):
9141020 self .assertEqual (re .match ("([\u2222 \u2223 ])" ,
0 commit comments