From 32b825b549e1f97d01ddad769cddc5a36800d3a4 Mon Sep 17 00:00:00 2001 From: Aled Owen Date: Wed, 19 Aug 2020 11:43:27 +0100 Subject: [PATCH] Modify parsing of ff headers to avoid numpy.fromfile. --- lib/iris/fileformats/_ff.py | 46 ++++++++++++++++--- .../tests/unit/fileformats/ff/test_FF2PP.py | 8 ++-- 2 files changed, 45 insertions(+), 9 deletions(-) diff --git a/lib/iris/fileformats/_ff.py b/lib/iris/fileformats/_ff.py index 37a4e76cbd..08f3e3f246 100644 --- a/lib/iris/fileformats/_ff.py +++ b/lib/iris/fileformats/_ff.py @@ -329,7 +329,7 @@ def __init__(self, filename, word_depth=DEFAULT_FF_WORD_DEPTH): # Read the FF header data with open(filename, "rb") as ff_file: # typically 64-bit words (aka. int64 or ">i8") - header_data = np.fromfile( + header_data = _parse_binary_stream( ff_file, dtype=">i{0}".format(word_depth), count=FF_HEADER_DEPTH, @@ -351,19 +351,19 @@ def __init__(self, filename, word_depth=DEFAULT_FF_WORD_DEPTH): ff_file.seek((addr[0] - 1) * word_depth, os.SEEK_SET) if len(addr) == 2: if elem == "integer_constants": - res = np.fromfile( + res = _parse_binary_stream( ff_file, dtype=">i{0}".format(word_depth), count=addr[1], ) else: - res = np.fromfile( + res = _parse_binary_stream( ff_file, dtype=">f{0}".format(word_depth), count=addr[1], ) elif len(addr) == 3: - res = np.fromfile( + res = _parse_binary_stream( ff_file, dtype=">f{0}".format(word_depth), count=addr[1] * addr[2], @@ -695,7 +695,7 @@ def _extract_field(self): ff_file_seek(table_offset, os.SEEK_SET) # Read the current PP header entry from the FF LOOKUP table. - header_longs = np.fromfile( + header_longs = _parse_binary_stream( ff_file, dtype=">i{0}".format(self._word_depth), count=pp.NUM_LONG_HEADERS, @@ -704,7 +704,7 @@ def _extract_field(self): if header_longs[0] == _FF_LOOKUP_TABLE_TERMINATE: # There are no more FF LOOKUP table entries to read. break - header_floats = np.fromfile( + header_floats = _parse_binary_stream( ff_file, dtype=">f{0}".format(self._word_depth), count=pp.NUM_FLOAT_HEADERS, @@ -816,6 +816,40 @@ def __iter__(self): return pp._interpret_fields(self._extract_field()) +def _parse_binary_stream(file_like, dtype=np.float, count=-1): + """ + Replacement :func:`numpy.fromfile` due to python3 performance issues. + + Args: + + * file_like - Standard python file_like object. + + Kwargs: + + * dtype - Data type to be parsed out, used to work out bytes read in. + + * count - The number of values required to be generated from the parsing. + The default is -1, which will read the entire contexts of the file_like + object and generate as many values as possible. + + """ + + # There are a wide range of types supported, we just need to know the byte + # size of the object, so we just make sure we've go an instance of a + # np.dtype + if not isinstance(dtype, np.dtype): + dtype = np.dtype(dtype) + + # Allocate bytearray for the file to be read into, allowing the numpy array + # to be writable. + _buffer = bytearray(count * dtype.itemsize) + file_like.readinto(_buffer) + + # Let numpy do the heavy lifting once we've sorted the file reading. + array = np.frombuffer(_buffer, dtype=dtype, count=-1) + return array + + def load_cubes(filenames, callback, constraints=None): """ Loads cubes from a list of fields files filenames. diff --git a/lib/iris/tests/unit/fileformats/ff/test_FF2PP.py b/lib/iris/tests/unit/fileformats/ff/test_FF2PP.py index 86c2615da6..49454a3594 100644 --- a/lib/iris/tests/unit/fileformats/ff/test_FF2PP.py +++ b/lib/iris/tests/unit/fileformats/ff/test_FF2PP.py @@ -87,9 +87,11 @@ def mock_for_extract_field(self, fields, x=None, y=None): ff2pp._ff_header.grid = mock.Mock(return_value=grid) open_func = "builtins.open" - with mock.patch("numpy.fromfile", return_value=[0]), mock.patch( - open_func - ), mock.patch("struct.unpack_from", return_value=[4]), mock.patch( + with mock.patch( + "iris.fileformats._ff._parse_binary_stream", return_value=[0] + ), mock.patch(open_func), mock.patch( + "struct.unpack_from", return_value=[4] + ), mock.patch( "iris.fileformats.pp.make_pp_field", side_effect=fields ), mock.patch( "iris.fileformats._ff.FF2PP._payload", return_value=(0, 0)