@@ -3256,7 +3256,11 @@ def test_code_page_name(self):
32563256 codecs .code_page_decode , self .CP_UTF8 , b'\xff ' , 'strict' , True )
32573257
32583258 def check_decode (self , cp , tests ):
3259- for raw , errors , expected in tests :
3259+ for raw , errors , expected , * rest in tests :
3260+ if rest :
3261+ altexpected , = rest
3262+ else :
3263+ altexpected = expected
32603264 if expected is not None :
32613265 try :
32623266 decoded = codecs .code_page_decode (cp , raw , errors , True )
@@ -3273,8 +3277,21 @@ def check_decode(self, cp, tests):
32733277 self .assertRaises (UnicodeDecodeError ,
32743278 codecs .code_page_decode , cp , raw , errors , True )
32753279
3280+ if altexpected is not None :
3281+ decoded = raw .decode (f'cp{ cp } ' , errors )
3282+ self .assertEqual (decoded , altexpected ,
3283+ '%a.decode("cp%s", %r)=%a != %a'
3284+ % (raw , cp , errors , decoded , altexpected ))
3285+ else :
3286+ self .assertRaises (UnicodeDecodeError ,
3287+ raw .decode , f'cp{ cp } ' , errors )
3288+
32763289 def check_encode (self , cp , tests ):
3277- for text , errors , expected in tests :
3290+ for text , errors , expected , * rest in tests :
3291+ if rest :
3292+ altexpected , = rest
3293+ else :
3294+ altexpected = expected
32783295 if expected is not None :
32793296 try :
32803297 encoded = codecs .code_page_encode (cp , text , errors )
@@ -3285,18 +3302,26 @@ def check_encode(self, cp, tests):
32853302 '%a.encode("cp%s", %r)=%a != %a'
32863303 % (text , cp , errors , encoded [0 ], expected ))
32873304 self .assertEqual (encoded [1 ], len (text ))
3305+
3306+ encoded = text .encode (f'cp{ cp } ' , errors )
3307+ self .assertEqual (encoded , altexpected ,
3308+ '%a.encode("cp%s", %r)=%a != %a'
3309+ % (text , cp , errors , encoded , altexpected ))
32883310 else :
32893311 self .assertRaises (UnicodeEncodeError ,
32903312 codecs .code_page_encode , cp , text , errors )
3313+ self .assertRaises (UnicodeEncodeError ,
3314+ text .encode , f'cp{ cp } ' , errors )
32913315
32923316 def test_cp932 (self ):
32933317 self .check_encode (932 , (
32943318 ('abc' , 'strict' , b'abc' ),
32953319 ('\uff44 \u9a3e ' , 'strict' , b'\x82 \x84 \xe9 \x80 ' ),
3320+ ('\uf8f3 ' , 'strict' , b'\xff ' ),
32963321 # test error handlers
32973322 ('\xff ' , 'strict' , None ),
32983323 ('[\xff ]' , 'ignore' , b'[]' ),
3299- ('[\xff ]' , 'replace' , b'[y]' ),
3324+ ('[\xff ]' , 'replace' , b'[y]' , b'[?]' ),
33003325 ('[\u20ac ]' , 'replace' , b'[?]' ),
33013326 ('[\xff ]' , 'backslashreplace' , b'[\\ xff]' ),
33023327 ('[\xff ]' , 'namereplace' ,
@@ -3310,12 +3335,12 @@ def test_cp932(self):
33103335 (b'abc' , 'strict' , 'abc' ),
33113336 (b'\x82 \x84 \xe9 \x80 ' , 'strict' , '\uff44 \u9a3e ' ),
33123337 # invalid bytes
3313- (b'[\xff ]' , 'strict' , None ),
3314- (b'[\xff ]' , 'ignore' , '[]' ),
3315- (b'[\xff ]' , 'replace' , '[\ufffd ]' ),
3316- (b'[\xff ]' , 'backslashreplace' , '[\\ xff]' ),
3317- (b'[\xff ]' , 'surrogateescape' , '[\udcff ]' ),
3318- (b'[\xff ]' , 'surrogatepass' , None ),
3338+ (b'[\xff ]' , 'strict' , None , '[ \uf8f3 ]' ),
3339+ (b'[\xff ]' , 'ignore' , '[]' , '[ \uf8f3 ]' ),
3340+ (b'[\xff ]' , 'replace' , '[\ufffd ]' , '[ \uf8f3 ]' ),
3341+ (b'[\xff ]' , 'backslashreplace' , '[\\ xff]' , '[ \uf8f3 ]' ),
3342+ (b'[\xff ]' , 'surrogateescape' , '[\udcff ]' , '[ \uf8f3 ]' ),
3343+ (b'[\xff ]' , 'surrogatepass' , None , '[ \uf8f3 ]' ),
33193344 (b'\x81 \x00 abc' , 'strict' , None ),
33203345 (b'\x81 \x00 abc' , 'ignore' , '\x00 abc' ),
33213346 (b'\x81 \x00 abc' , 'replace' , '\ufffd \x00 abc' ),
@@ -3330,7 +3355,7 @@ def test_cp1252(self):
33303355 # test error handlers
33313356 ('\u0141 ' , 'strict' , None ),
33323357 ('\u0141 ' , 'ignore' , b'' ),
3333- ('\u0141 ' , 'replace' , b'L' ),
3358+ ('\u0141 ' , 'replace' , b'L' , b'?' ),
33343359 ('\udc98 ' , 'surrogateescape' , b'\x98 ' ),
33353360 ('\udc98 ' , 'surrogatepass' , None ),
33363361 ))
@@ -3340,6 +3365,59 @@ def test_cp1252(self):
33403365 (b'\xff ' , 'strict' , '\xff ' ),
33413366 ))
33423367
3368+ def test_cp708 (self ):
3369+ self .check_encode (708 , (
3370+ ('abc2%' , 'strict' , b'abc2%' ),
3371+ ('\u060c \u0621 \u064a ' , 'strict' , b'\xac \xc1 \xea ' ),
3372+ ('\u2562 \xe7 \xa0 ' , 'strict' , b'\x86 \x87 \xff ' ),
3373+ ('\x9a \x9f ' , 'strict' , b'\x9a \x9f ' ),
3374+ ('\u256b ' , 'strict' , b'\xc0 ' ),
3375+ # test error handlers
3376+ ('[\u0662 ]' , 'strict' , None ),
3377+ ('[\u0662 ]' , 'ignore' , b'[]' ),
3378+ ('[\u0662 ]' , 'replace' , b'[?]' ),
3379+ ('\udca0 ' , 'surrogateescape' , b'\xa0 ' ),
3380+ ('\udca0 ' , 'surrogatepass' , None ),
3381+ ))
3382+ self .check_decode (708 , (
3383+ (b'abc2%' , 'strict' , 'abc2%' ),
3384+ (b'\xac \xc1 \xea ' , 'strict' , '\u060c \u0621 \u064a ' ),
3385+ (b'\x86 \x87 \xff ' , 'strict' , '\u2562 \xe7 \xa0 ' ),
3386+ (b'\x9a \x9f ' , 'strict' , '\x9a \x9f ' ),
3387+ (b'\xc0 ' , 'strict' , '\u256b ' ),
3388+ # test error handlers
3389+ (b'\xa0 ' , 'strict' , None ),
3390+ (b'[\xa0 ]' , 'ignore' , '[]' ),
3391+ (b'[\xa0 ]' , 'replace' , '[\ufffd ]' ),
3392+ (b'[\xa0 ]' , 'backslashreplace' , '[\\ xa0]' ),
3393+ (b'[\xa0 ]' , 'surrogateescape' , '[\udca0 ]' ),
3394+ (b'[\xa0 ]' , 'surrogatepass' , None ),
3395+ ))
3396+
3397+ def test_cp20106 (self ):
3398+ self .check_encode (20106 , (
3399+ ('abc' , 'strict' , b'abc' ),
3400+ ('\xa7 \xc4 \xdf ' , 'strict' , b'@[~' ),
3401+ # test error handlers
3402+ ('@' , 'strict' , None ),
3403+ ('@' , 'ignore' , b'' ),
3404+ ('@' , 'replace' , b'?' ),
3405+ ('\udcbf ' , 'surrogateescape' , b'\xbf ' ),
3406+ ('\udcbf ' , 'surrogatepass' , None ),
3407+ ))
3408+ self .check_decode (20106 , (
3409+ (b'abc' , 'strict' , 'abc' ),
3410+ (b'@[~' , 'strict' , '\xa7 \xc4 \xdf ' ),
3411+ (b'\xe1 \xfe ' , 'strict' , 'a\xdf ' ),
3412+ # test error handlers
3413+ (b'(\xbf )' , 'strict' , None ),
3414+ (b'(\xbf )' , 'ignore' , '()' ),
3415+ (b'(\xbf )' , 'replace' , '(\ufffd )' ),
3416+ (b'(\xbf )' , 'backslashreplace' , '(\\ xbf)' ),
3417+ (b'(\xbf )' , 'surrogateescape' , '(\udcbf )' ),
3418+ (b'(\xbf )' , 'surrogatepass' , None ),
3419+ ))
3420+
33433421 def test_cp_utf7 (self ):
33443422 cp = 65000
33453423 self .check_encode (cp , (
@@ -3412,17 +3490,15 @@ def test_incremental(self):
34123490 False )
34133491 self .assertEqual (decoded , ('abc' , 3 ))
34143492
3415- def test_mbcs_alias (self ):
3416- # Check that looking up our 'default' codepage will return
3417- # mbcs when we don't have a more specific one available
3418- code_page = 99_999
3419- name = f'cp{ code_page } '
3420- with mock .patch ('_winapi.GetACP' , return_value = code_page ):
3421- try :
3422- codec = codecs .lookup (name )
3423- self .assertEqual (codec .name , 'mbcs' )
3424- finally :
3425- codecs .unregister (name )
3493+ def test_mbcs_code_page (self ):
3494+ # Check that codec for the current Windows (ANSII) code page is
3495+ # always available.
3496+ try :
3497+ from _winapi import GetACP
3498+ except ImportError :
3499+ self .skipTest ('requires _winapi.GetACP' )
3500+ cp = GetACP ()
3501+ codecs .lookup (f'cp{ cp } ' )
34263502
34273503 @support .bigmemtest (size = 2 ** 31 , memuse = 7 , dry_run = False )
34283504 def test_large_input (self , size ):
0 commit comments