Merge pull request #210 from GaelVaroquaux/fast_gzip_read

matthew-brett · matthew-brett · commit 5e00fd2fb7e9 · 2014-10-13T13:05:51.000-07:00
MRG: fast reads on large gzip files Fixes #209
diff --git a/nibabel/openers.py b/nibabel/openers.py
@@ -13,6 +13,17 @@
 import gzip
 import bz2
 
+# The largest memory chunk that gzip can use for reads
+GZIP_MAX_READ_CHUNK = 100 * 1024 * 1024 # 100Mb
+
+
+def _gzip_open(fileish, *args, **kwargs):
+    # open gzip files with faster reads on large files using larger chunks
+    # See https://github.com/nipy/nibabel/pull/210 for discussion
+    gzip_file = gzip.open(fileish, *args, **kwargs)
+    gzip_file.max_read_chunk = GZIP_MAX_READ_CHUNK
+    return gzip_file
+
 
 class Opener(object):
     """ Class to accept, maybe open, and context-manage file-likes / filenames
@@ -32,7 +43,7 @@ class Opener(object):
         passed to opening method when `fileish` is str.  Change of defaults as
         for \*args
     """
-    gz_def = (gzip.open, ('mode', 'compresslevel'))
+    gz_def = (_gzip_open, ('mode', 'compresslevel'))
     bz2_def = (bz2.BZ2File, ('mode', 'buffering', 'compresslevel'))
     compress_ext_map = {
         '.gz': gz_def,