- 
          
- 
                Notifications
    You must be signed in to change notification settings 
- Fork 19.2k
ENH: Allow compression in NDFrame.to_csv to be a dict with optional arguments (#26023) #26024
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
4e73dc4
              ab7620d
              2e782f9
              83e8834
              d238878
              b41be54
              60ea58c
              8ba9082
              0a3a9fd
              a1cb3f7
              af2a96c
              5853a28
              789751f
              5b09e6f
              68a2b4d
              c856f50
              8df6c81
              40d0252
              18a735d
              103c877
              b6c34bc
              969d387
              abfbc0f
              04ae25d
              9c22652
              56a75c2
              bbfea34
              7717f16
              779511e
              780eb04
              6c4e679
              1b567c9
              9324b63
              7cf65ee
              29374f3
              6701aa4
              0f5489d
              e04138e
              6f2bf00
              865aa81
              8d1deee
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -235,19 +235,26 @@ def file_path_to_url(path): | |
|  | ||
| def _infer_compression(filepath_or_buffer, compression): | ||
| """ | ||
| Get the compression method for filepath_or_buffer. If compression='infer', | ||
| the inferred compression method is returned. Otherwise, the input | ||
| Get the compression method for filepath_or_buffer. If compression mode is | ||
| 'infer', the inferred compression method is returned. Otherwise, the input | ||
| compression method is returned unchanged, unless it's invalid, in which | ||
| case an error is raised. | ||
|  | ||
| Parameters | ||
| ---------- | ||
| filepath_or_buffer : | ||
| a path (str) or buffer | ||
| compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None} | ||
| If 'infer' and `filepath_or_buffer` is path-like, then detect | ||
| compression from the following extensions: '.gz', '.bz2', '.zip', | ||
| or '.xz' (otherwise no compression). | ||
| compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None} or dict | ||
| If string, specifies compression mode. If dict, value at key 'method' | ||
| specifies compression mode. If compression mode is 'infer' and | ||
| `filepath_or_buffer` is path-like, then detect compression from the | ||
| following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no | ||
| compression). | ||
|  | ||
| .. versionchanged 0.25.0 | ||
|  | ||
| May now be a dict with required key 'method' specifying compression | ||
| mode | ||
|  | ||
| Returns | ||
| ------- | ||
|  | @@ -259,6 +266,14 @@ def _infer_compression(filepath_or_buffer, compression): | |
| ValueError on invalid compression specified | ||
| """ | ||
|  | ||
| # Handle compression method as dict | ||
| if isinstance(compression, dict): | ||
| try: | ||
| compression = compression['method'] | ||
| except KeyError: | ||
| raise ValueError("Compression dict must have key " | ||
| "'method'") | ||
|  | ||
| # No compression has been explicitly specified | ||
| if compression is None: | ||
| return None | ||
|  | @@ -299,10 +314,21 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None, | |
| mode : str | ||
| mode to open path_or_buf with | ||
| encoding : str or None | ||
| compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default None | ||
| If 'infer' and `filepath_or_buffer` is path-like, then detect | ||
| compression from the following extensions: '.gz', '.bz2', '.zip', | ||
| or '.xz' (otherwise no compression). | ||
| compression : str or dict, default None | ||
| If string, specifies compression mode. If dict, value at key 'method' | ||
| specifies compression mode. Compression mode must be one of {'infer', | ||
| 'gzip', 'bz2', 'zip', 'xz', None}. If compression mode is 'infer' | ||
| and `filepath_or_buffer` is path-like, then detect compression from | ||
| the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise | ||
| no compression). If dict and compression mode is 'zip' or inferred as | ||
| 'zip', optional value at key 'arcname' specifies the name of the file | ||
| within ZIP archive at `path_or_buf`. | ||
|  | ||
| .. versionchanged:: 0.25.0 | ||
|  | ||
| May now be a dict with key 'method' as compression mode | ||
| and 'arcname' as CSV file name if mode is 'zip' | ||
|          | ||
|  | ||
| memory_map : boolean, default False | ||
| See parsers._parser_params for more information. | ||
| is_text : boolean, default True | ||
|  | @@ -329,28 +355,32 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None, | |
| path_or_buf = _stringify_path(path_or_buf) | ||
| is_path = isinstance(path_or_buf, str) | ||
|  | ||
| compression_method = None | ||
| if is_path: | ||
| compression = _infer_compression(path_or_buf, compression) | ||
| compression_method = _infer_compression(path_or_buf, compression) | ||
|  | ||
| if compression: | ||
| if compression_method: | ||
|  | ||
| # GZ Compression | ||
| if compression == 'gzip': | ||
| if compression_method == 'gzip': | ||
| if is_path: | ||
| f = gzip.open(path_or_buf, mode) | ||
| else: | ||
| f = gzip.GzipFile(fileobj=path_or_buf) | ||
|  | ||
| # BZ Compression | ||
| elif compression == 'bz2': | ||
| elif compression_method == 'bz2': | ||
| if is_path: | ||
| f = bz2.BZ2File(path_or_buf, mode) | ||
| else: | ||
| f = bz2.BZ2File(path_or_buf) | ||
|  | ||
| # ZIP Compression | ||
| elif compression == 'zip': | ||
| zf = BytesZipFile(path_or_buf, mode) | ||
| elif compression_method == 'zip': | ||
| arcname = None | ||
|          | ||
| if isinstance(compression, dict) and 'arcname' in compression: | ||
| arcname = compression['arcname'] | ||
|         
                  drew-heenan marked this conversation as resolved.
              Outdated
          
            Show resolved
            Hide resolved | ||
| zf = BytesZipFile(path_or_buf, mode, arcname=arcname) | ||
| # Ensure the container is closed as well. | ||
| handles.append(zf) | ||
| if zf.mode == 'w': | ||
|  | @@ -368,14 +398,9 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None, | |
| .format(zip_names)) | ||
|  | ||
| # XZ Compression | ||
| elif compression == 'xz': | ||
| elif compression_method == 'xz': | ||
| f = lzma.LZMAFile(path_or_buf, mode) | ||
|  | ||
| # Unrecognized Compression | ||
|         
                  drew-heenan marked this conversation as resolved.
              Show resolved
            Hide resolved | ||
| else: | ||
| msg = 'Unrecognized compression type: {}'.format(compression) | ||
| raise ValueError(msg) | ||
|  | ||
| handles.append(f) | ||
|  | ||
| elif is_path: | ||
|  | @@ -391,7 +416,7 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None, | |
| handles.append(f) | ||
|  | ||
| # Convert BytesIO or file objects passed with an encoding | ||
| if is_text and (compression or isinstance(f, need_text_wrapping)): | ||
| if is_text and (compression_method or isinstance(f, need_text_wrapping)): | ||
| from io import TextIOWrapper | ||
| f = TextIOWrapper(f, encoding=encoding, newline='') | ||
| handles.append(f) | ||
|  | @@ -420,13 +445,16 @@ class BytesZipFile(zipfile.ZipFile, BytesIO): # type: ignore | |
| bytes strings into a member of the archive. | ||
| """ | ||
| # GH 17778 | ||
| def __init__(self, file, mode, compression=zipfile.ZIP_DEFLATED, **kwargs): | ||
| def __init__(self, file, mode, compression=zipfile.ZIP_DEFLATED, | ||
| arcname=None, **kwargs): | ||
| if mode in ['wb', 'rb']: | ||
| mode = mode.replace('b', '') | ||
| self.arcname = arcname | ||
| super(BytesZipFile, self).__init__(file, mode, compression, **kwargs) | ||
|  | ||
| def write(self, data): | ||
| super(BytesZipFile, self).writestr(self.filename, data) | ||
| arcname = self.filename if self.arcname is None else self.arcname | ||
| super(BytesZipFile, self).writestr(arcname, data) | ||
|         
                  drew-heenan marked this conversation as resolved.
              Outdated
          
            Show resolved
            Hide resolved | ||
|  | ||
| @property | ||
| def closed(self): | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -36,8 +36,20 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', | |
| if path_or_buf is None: | ||
| path_or_buf = StringIO() | ||
|  | ||
| self._compression_arg = compression | ||
|          | ||
| compression_mode = compression | ||
|  | ||
| # Extract compression mode as given, if dict | ||
| if isinstance(compression, dict): | ||
| try: | ||
| compression_mode = compression['method'] | ||
| except KeyError: | ||
| raise ValueError("If dict, compression must have key " | ||
| "'method'") | ||
|  | ||
| self.path_or_buf, _, _, _ = get_filepath_or_buffer( | ||
| path_or_buf, encoding=encoding, compression=compression, mode=mode | ||
| path_or_buf, encoding=encoding, | ||
| compression=compression_mode, mode=mode | ||
| ) | ||
| self.sep = sep | ||
| self.na_rep = na_rep | ||
|  | @@ -150,7 +162,7 @@ def save(self): | |
| else: | ||
| f, handles = _get_handle(self.path_or_buf, self.mode, | ||
| encoding=self.encoding, | ||
| compression=self.compression) | ||
| compression=self._compression_arg) | ||
| close = True | ||
|  | ||
| try: | ||
|  | @@ -176,7 +188,7 @@ def save(self): | |
| else: | ||
| f, handles = _get_handle(self.path_or_buf, self.mode, | ||
| encoding=self.encoding, | ||
| compression=self.compression) | ||
| compression=self._compression_arg) | ||
| f.write(buf) | ||
| close = True | ||
| if close: | ||
|  | ||
Uh oh!
There was an error while loading. Please reload this page.