@@ -63,6 +63,13 @@ def urlopen(*args, **kwargs):
6363_VALID_URLS = set (uses_relative + uses_netloc + uses_params )
6464_VALID_URLS .discard ('' )
6565
66+ _compression_to_extension = {
67+ 'gzip' : '.gz' ,
68+ 'bz2' : '.bz2' ,
69+ 'zip' : '.zip' ,
70+ 'xz' : '.xz' ,
71+ }
72+
6673
6774class ParserError (ValueError ):
6875 """
@@ -234,20 +241,19 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
234241 -------
235242 a filepath_or_buffer, the encoding, the compression
236243 """
237-
244+
238245 if _is_url (filepath_or_buffer ):
239- req = _urlopen (str (filepath_or_buffer ))
246+ url = str (filepath_or_buffer )
247+ req = _urlopen (url )
240248 if compression == 'infer' :
241- content_encoding = req . headers . get ( 'Content-Encoding' , None )
242- if content_encoding == 'gzip' :
243- compression = 'gzip'
249+ for compression , extension in _compression_to_extension . items ():
250+ if url . endswith ( extension ) :
251+ break
244252 else :
245- compression = None
246- # cat on the compression to the tuple returned by the function
247- to_return = (list (maybe_read_encoded_stream (req , encoding ,
248- compression )) +
249- [compression ])
250- return tuple (to_return )
253+ content_encoding = req .headers .get ('Content-Encoding' , None )
254+ compression = 'gzip' if content_encoding == 'gzip' else None
255+ reader , encoding = maybe_read_encoded_stream (req , encoding , compression )
256+ return reader , encoding , compression
251257
252258 if _is_s3_url (filepath_or_buffer ):
253259 from pandas .io .s3 import get_filepath_or_buffer
0 commit comments