@@ -3457,11 +3457,12 @@ class ArchiveMaker:
34573457 with t.open() as tar:
34583458 ... # `tar` is now a TarFile with 'filename' in it!
34593459 """
3460- def __init__ (self ):
3460+ def __init__ (self , ** kwargs ):
34613461 self .bio = io .BytesIO ()
3462+ self .tar_kwargs = dict (kwargs )
34623463
34633464 def __enter__ (self ):
3464- self .tar_w = tarfile .TarFile (mode = 'w' , fileobj = self .bio )
3465+ self .tar_w = tarfile .TarFile (mode = 'w' , fileobj = self .bio , ** self . tar_kwargs )
34653466 return self
34663467
34673468 def __exit__ (self , * exc ):
@@ -4040,7 +4041,10 @@ def test_tar_filter(self):
40404041 # that in the test archive.)
40414042 with tarfile .TarFile .open (tarname ) as tar :
40424043 for tarinfo in tar .getmembers ():
4043- filtered = tarfile .tar_filter (tarinfo , '' )
4044+ try :
4045+ filtered = tarfile .tar_filter (tarinfo , '' )
4046+ except UnicodeEncodeError :
4047+ continue
40444048 self .assertIs (filtered .name , tarinfo .name )
40454049 self .assertIs (filtered .type , tarinfo .type )
40464050
@@ -4051,11 +4055,48 @@ def test_data_filter(self):
40514055 for tarinfo in tar .getmembers ():
40524056 try :
40534057 filtered = tarfile .data_filter (tarinfo , '' )
4054- except tarfile .FilterError :
4058+ except ( tarfile .FilterError , UnicodeEncodeError ) :
40554059 continue
40564060 self .assertIs (filtered .name , tarinfo .name )
40574061 self .assertIs (filtered .type , tarinfo .type )
40584062
4063+ @unittest .skipIf (sys .platform == 'win32' , 'requires native bytes paths' )
4064+ def test_filter_unencodable (self ):
4065+ # Sanity check using a valid path.
4066+ tarinfo = tarfile .TarInfo (os_helper .TESTFN )
4067+ filtered = tarfile .tar_filter (tarinfo , '' )
4068+ self .assertIs (filtered .name , tarinfo .name )
4069+ filtered = tarfile .data_filter (tarinfo , '' )
4070+ self .assertIs (filtered .name , tarinfo .name )
4071+
4072+ tarinfo = tarfile .TarInfo ('test\x00 ' )
4073+ self .assertRaises (ValueError , tarfile .tar_filter , tarinfo , '' )
4074+ self .assertRaises (ValueError , tarfile .data_filter , tarinfo , '' )
4075+ tarinfo = tarfile .TarInfo ('\ud800 ' )
4076+ self .assertRaises (UnicodeEncodeError , tarfile .tar_filter , tarinfo , '' )
4077+ self .assertRaises (UnicodeEncodeError , tarfile .data_filter , tarinfo , '' )
4078+
4079+ @unittest .skipIf (sys .platform == 'win32' , 'requires native bytes paths' )
4080+ def test_extract_unencodable (self ):
4081+ # Create a member with name \xed\xa0\x80 which is UTF-8 encoded
4082+ # lone surrogate \ud800.
4083+ with ArchiveMaker (encoding = 'ascii' , errors = 'surrogateescape' ) as arc :
4084+ arc .add ('\udced \udca0 \udc80 ' )
4085+ with os_helper .temp_cwd () as tmp :
4086+ tar = arc .open (encoding = 'utf-8' , errors = 'surrogatepass' ,
4087+ errorlevel = 1 )
4088+ self .assertEqual (tar .getnames (), ['\ud800 ' ])
4089+ with self .assertRaises (UnicodeEncodeError ):
4090+ tar .extractall (filter = tarfile .tar_filter )
4091+ self .assertEqual (os .listdir (), [])
4092+
4093+ tar = arc .open (encoding = 'utf-8' , errors = 'surrogatepass' ,
4094+ errorlevel = 0 , debug = 1 )
4095+ with support .captured_stderr () as stderr :
4096+ tar .extractall (filter = tarfile .tar_filter )
4097+ self .assertEqual (os .listdir (), [])
4098+ self .assertIn ('tarfile: UnicodeEncodeError ' , stderr .getvalue ())
4099+
40594100 def test_default_filter_warns (self ):
40604101 """Ensure the default filter warns"""
40614102 with ArchiveMaker () as arc :
0 commit comments