99import mmap
1010import os
1111import pathlib
12- from typing import IO , AnyStr , BinaryIO , Optional , TextIO , Type
12+ from typing import (
13+ IO ,
14+ Any ,
15+ AnyStr ,
16+ BinaryIO ,
17+ Dict ,
18+ List ,
19+ Optional ,
20+ TextIO ,
21+ Tuple ,
22+ Type ,
23+ Union ,
24+ )
1325from urllib .error import URLError # noqa
1426from urllib .parse import ( # noqa
1527 urlencode ,
@@ -255,6 +267,40 @@ def file_path_to_url(path: str) -> str:
255267_compression_to_extension = {"gzip" : ".gz" , "bz2" : ".bz2" , "zip" : ".zip" , "xz" : ".xz" }
256268
257269
270+ def _get_compression_method (
271+ compression : Optional [Union [str , Dict [str , str ]]]
272+ ) -> Tuple [Optional [str ], Dict [str , str ]]:
273+ """
274+ Simplifies a compression argument to a compression method string and
275+ a dict containing additional arguments.
276+
277+ Parameters
278+ ----------
279+ compression : str or dict
280+ If string, specifies the compression method. If dict, value at key
281+ 'method' specifies compression method.
282+
283+ Returns
284+ -------
285+ tuple of ({compression method}, Optional[str]
286+ {compression arguments}, Dict[str, str])
287+
288+ Raises
289+ ------
290+ ValueError on dict missing 'method' key
291+ """
292+ # Handle dict
293+ if isinstance (compression , dict ):
294+ compression_args = compression .copy ()
295+ try :
296+ compression = compression_args .pop ("method" )
297+ except KeyError :
298+ raise ValueError ("If dict, compression must have key 'method'" )
299+ else :
300+ compression_args = {}
301+ return compression , compression_args
302+
303+
258304def _infer_compression (
259305 filepath_or_buffer : FilePathOrBuffer , compression : Optional [str ]
260306) -> Optional [str ]:
@@ -266,21 +312,20 @@ def _infer_compression(
266312
267313 Parameters
268314 ----------
269- filepath_or_buffer :
270- a path (str) or buffer
315+ filepath_or_buffer : str or file handle
316+ File path or object.
271317 compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}
272318 If 'infer' and `filepath_or_buffer` is path-like, then detect
273319 compression from the following extensions: '.gz', '.bz2', '.zip',
274320 or '.xz' (otherwise no compression).
275321
276322 Returns
277323 -------
278- string or None :
279- compression method
324+ string or None
280325
281326 Raises
282327 ------
283- ValueError on invalid compression specified
328+ ValueError on invalid compression specified.
284329 """
285330
286331 # No compression has been explicitly specified
@@ -312,32 +357,49 @@ def _infer_compression(
312357
313358
314359def _get_handle (
315- path_or_buf , mode , encoding = None , compression = None , memory_map = False , is_text = True
360+ path_or_buf ,
361+ mode : str ,
362+ encoding = None ,
363+ compression : Optional [Union [str , Dict [str , Any ]]] = None ,
364+ memory_map : bool = False ,
365+ is_text : bool = True ,
316366):
317367 """
318368 Get file handle for given path/buffer and mode.
319369
320370 Parameters
321371 ----------
322- path_or_buf :
323- a path (str) or buffer
372+ path_or_buf : str or file handle
373+ File path or object.
324374 mode : str
325- mode to open path_or_buf with
375+ Mode to open path_or_buf with.
326376 encoding : str or None
327- compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default None
328- If 'infer' and `filepath_or_buffer` is path-like, then detect
329- compression from the following extensions: '.gz', '.bz2', '.zip',
330- or '.xz' (otherwise no compression).
377+ Encoding to use.
378+ compression : str or dict, default None
379+ If string, specifies compression mode. If dict, value at key 'method'
380+ specifies compression mode. Compression mode must be one of {'infer',
381+ 'gzip', 'bz2', 'zip', 'xz', None}. If compression mode is 'infer'
382+ and `filepath_or_buffer` is path-like, then detect compression from
383+ the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise
384+ no compression). If dict and compression mode is 'zip' or inferred as
385+ 'zip', other entries passed as additional compression options.
386+
387+ .. versionchanged:: 1.0.0
388+
389+ May now be a dict with key 'method' as compression mode
390+ and other keys as compression options if compression
391+ mode is 'zip'.
392+
331393 memory_map : boolean, default False
332394 See parsers._parser_params for more information.
333395 is_text : boolean, default True
334396 whether file/buffer is in text format (csv, json, etc.), or in binary
335- mode (pickle, etc.)
397+ mode (pickle, etc.).
336398
337399 Returns
338400 -------
339401 f : file-like
340- A file-like object
402+ A file-like object.
341403 handles : list of file-like objects
342404 A list of file-like object that were opened in this function.
343405 """
@@ -346,15 +408,16 @@ def _get_handle(
346408
347409 need_text_wrapping = (BufferedIOBase , S3File )
348410 except ImportError :
349- need_text_wrapping = BufferedIOBase
411+ need_text_wrapping = BufferedIOBase # type: ignore
350412
351- handles = list ()
413+ handles = list () # type: List[IO]
352414 f = path_or_buf
353415
354416 # Convert pathlib.Path/py.path.local or string
355417 path_or_buf = _stringify_path (path_or_buf )
356418 is_path = isinstance (path_or_buf , str )
357419
420+ compression , compression_args = _get_compression_method (compression )
358421 if is_path :
359422 compression = _infer_compression (path_or_buf , compression )
360423
@@ -376,7 +439,7 @@ def _get_handle(
376439
377440 # ZIP Compression
378441 elif compression == "zip" :
379- zf = BytesZipFile (path_or_buf , mode )
442+ zf = BytesZipFile (path_or_buf , mode , ** compression_args )
380443 # Ensure the container is closed as well.
381444 handles .append (zf )
382445 if zf .mode == "w" :
@@ -429,9 +492,9 @@ def _get_handle(
429492
430493 if memory_map and hasattr (f , "fileno" ):
431494 try :
432- g = MMapWrapper (f )
495+ wrapped = MMapWrapper (f )
433496 f .close ()
434- f = g
497+ f = wrapped
435498 except Exception :
436499 # we catch any errors that may have occurred
437500 # because that is consistent with the lower-level
@@ -456,15 +519,19 @@ def __init__(
456519 self ,
457520 file : FilePathOrBuffer ,
458521 mode : str ,
459- compression : int = zipfile . ZIP_DEFLATED ,
522+ archive_name : Optional [ str ] = None ,
460523 ** kwargs
461524 ):
462525 if mode in ["wb" , "rb" ]:
463526 mode = mode .replace ("b" , "" )
464- super ().__init__ (file , mode , compression , ** kwargs )
527+ self .archive_name = archive_name
528+ super ().__init__ (file , mode , zipfile .ZIP_DEFLATED , ** kwargs )
465529
466530 def write (self , data ):
467- super ().writestr (self .filename , data )
531+ archive_name = self .filename
532+ if self .archive_name is not None :
533+ archive_name = self .archive_name
534+ super ().writestr (archive_name , data )
468535
469536 @property
470537 def closed (self ):
0 commit comments