@@ -158,11 +158,12 @@ class ParserWarning(Warning):
158158 information
159159 <http://pandas.pydata.org/pandas-docs/stable/io.html#io-chunking>`_ on
160160 ``iterator`` and ``chunksize``.
161- compression : {'infer', 'gzip', 'bz2', None}, default 'infer'
162- For on-the-fly decompression of on-disk data. If 'infer', then use gzip or
163- bz2 if filepath_or_buffer is a string ending in '.gz' or '.bz2',
164- respectively, and no decompression otherwise. Set to None for no
165- decompression.
161+ compression : {'gzip', 'bz2', 'zip', 'infer', None}, default 'infer'
162+ For on-the-fly decompression of on-disk data. If 'infer', then use gzip,
163+ bz2 or zip if filepath_or_buffer is a string ending in '.gz', '.bz2' or
164+ '.zip', respectively, and no decompression otherwise. New in 0.18.1: ZIP
165+ compression If using 'zip', the ZIP file must contain only one data file
166+ to be read in. Set to None for no decompression.
166167thousands : str, default None
167168 Thousands separator
168169decimal : str, default '.'
@@ -273,6 +274,8 @@ def _read(filepath_or_buffer, kwds):
273274 inferred_compression = 'gzip'
274275 elif filepath_or_buffer .endswith ('.bz2' ):
275276 inferred_compression = 'bz2'
277+ elif filepath_or_buffer .endswith ('.zip' ):
278+ inferred_compression = 'zip'
276279 else :
277280 inferred_compression = None
278281 else :
@@ -1397,6 +1400,25 @@ def _wrap_compressed(f, compression, encoding=None):
13971400 data = bz2 .decompress (f .read ())
13981401 f = StringIO (data )
13991402 return f
1403+ elif compression == 'zip' :
1404+ import zipfile
1405+ zip_file = zipfile .ZipFile (f )
1406+ zip_names = zip_file .namelist ()
1407+ print ('ZIPNAMES' + zip_names )
1408+
1409+ if len (zip_names ) == 1 :
1410+ file_name = zip_names .pop ()
1411+ f = zip_file .open (file_name )
1412+ return f
1413+
1414+ elif len (zip_names ) == 0 :
1415+ raise ValueError ('Corrupted or zero files found in compressed '
1416+ 'zip file %s' , zip_file .filename )
1417+
1418+ else :
1419+ raise ValueError ('Multiple files found in compressed '
1420+ 'zip file %s' , str (zip_names ))
1421+
14001422 else :
14011423 raise ValueError ('do not recognize compression method %s'
14021424 % compression )
0 commit comments