From 46122c42418e49514036e73eec1ccb1880d141e9 Mon Sep 17 00:00:00 2001 From: GaelVaroquaux Date: Thu, 14 Nov 2013 17:26:17 +0100 Subject: [PATCH 1/3] ENH: fast reads on large gzip files Fixes #209 --- nibabel/openers.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/nibabel/openers.py b/nibabel/openers.py index 5de78bf8e2..01b6b9851b 100644 --- a/nibabel/openers.py +++ b/nibabel/openers.py @@ -13,6 +13,12 @@ import gzip import bz2 +def _gzip_open(fileish, *args, **kwargs): + # open gzip files with faster reads on large files using larger chuncks + gzip_file = gzip.open(fileish, *args, **kwargs) + gzip_file.max_read_chunk = 100 * 1024 * 1024 # 100Mb + return gzip_file + class Opener(object): """ Class to accept, maybe open, and context-manage file-likes / filenames From dee72f815550dd4ed41f56482cf25b21e2817df6 Mon Sep 17 00:00:00 2001 From: GaelVaroquaux Date: Mon, 18 Nov 2013 14:09:09 +0100 Subject: [PATCH 2/3] BUG: fix previous commit Acutally use the opening logic --- nibabel/openers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nibabel/openers.py b/nibabel/openers.py index 01b6b9851b..e3931212a6 100644 --- a/nibabel/openers.py +++ b/nibabel/openers.py @@ -38,7 +38,7 @@ class Opener(object): passed to opening method when `fileish` is str. Change of defaults as for \*args """ - gz_def = (gzip.open, ('mode', 'compresslevel')) + gz_def = (_gzip_open, ('mode', 'compresslevel')) bz2_def = (bz2.BZ2File, ('mode', 'buffering', 'compresslevel')) compress_ext_map = { '.gz': gz_def, From 69edcc966ebbe9f28cba1ff8bc9d40fd1c1c986d Mon Sep 17 00:00:00 2001 From: Matthew Brett Date: Tue, 29 Jul 2014 12:35:41 -0700 Subject: [PATCH 3/3] RF: put gzip chunk constant into module global Allows user to set max chunk size. --- nibabel/openers.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/nibabel/openers.py b/nibabel/openers.py index e3931212a6..1b16ba1476 100644 --- a/nibabel/openers.py +++ b/nibabel/openers.py @@ -13,10 +13,15 @@ import gzip import bz2 +# The largest memory chunk that gzip can use for reads +GZIP_MAX_READ_CHUNK = 100 * 1024 * 1024 # 100Mb + + def _gzip_open(fileish, *args, **kwargs): - # open gzip files with faster reads on large files using larger chuncks + # open gzip files with faster reads on large files using larger chunks + # See https://github.com/nipy/nibabel/pull/210 for discussion gzip_file = gzip.open(fileish, *args, **kwargs) - gzip_file.max_read_chunk = 100 * 1024 * 1024 # 100Mb + gzip_file.max_read_chunk = GZIP_MAX_READ_CHUNK return gzip_file