diff --git a/appveyor.yml b/appveyor.yml index 3bb9c3d074..e1ecfad43e 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -19,6 +19,9 @@ install: # the parent CMD process). - SET PATH=%PYTHON%;%PYTHON%\Scripts;%PATH% + # Update install environment + - pip install --upgrade pip setuptools + # Install the dependencies of the project. - pip install numpy scipy matplotlib nose h5py mock hypothesis pydicom - pip install . diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py index 21cd7b40a9..fa8d1adb60 100755 --- a/nibabel/cmdline/diff.py +++ b/nibabel/cmdline/diff.py @@ -39,6 +39,21 @@ def get_opt_parser(): Option("-H", "--header-fields", dest="header_fields", default='all', help="Header fields (comma separated) to be printed as well (if present)"), + + Option("--ma", "--data-max-abs-diff", + dest="data_max_abs_diff", + type=float, + default=0.0, + help="Maximal absolute difference in data between files to tolerate."), + + Option("--mr", "--data-max-rel-diff", + dest="data_max_rel_diff", + type=float, + default=0.0, + help="Maximal relative difference in data between files to tolerate." + " If --data-max-abs-diff is also specified, only the data points " + " with absolute difference greater than that value would be " + " considered for relative difference check."), ]) return p @@ -101,8 +116,8 @@ def get_headers_diff(file_headers, names=None): return difference -def get_data_diff(files): - """Get difference between md5 values +def get_data_hash_diff(files): + """Get difference between md5 values of data Parameters ---------- @@ -115,7 +130,7 @@ def get_data_diff(files): """ md5sums = [ - hashlib.md5(np.ascontiguousarray(nib.load(f).get_data(), dtype=np.float32)).hexdigest() + hashlib.md5(np.ascontiguousarray(nib.load(f).get_fdata())).hexdigest() for f in files ] @@ -125,6 +140,84 @@ def get_data_diff(files): return md5sums +def get_data_diff(files, max_abs=0, max_rel=0): + """Get difference between data + + Parameters + ---------- + files: list of (str or ndarray) + If list of strings is provided -- they must be existing file names + max_abs: float, optional + Maximal absolute difference to tolerate. + max_rel: float, optional + Maximal relative (`abs(diff)/mean(diff)`) difference to tolerate. + If `max_abs` is specified, then those data points with lesser than that + absolute difference, are not considered for relative difference testing + + Returns + ------- + diffs: OrderedDict + An ordered dict with a record per each file which has differences + with other files subsequent detected. Each record is a list of + difference records, one per each file pair. + Each difference record is an Ordered Dict with possible keys + 'abs' or 'rel' showing maximal absolute or relative differences + in the file or the record ('CMP': 'incompat') if file shapes + are incompatible. + """ + + # we are doomed to keep them in RAM now + data = [f if isinstance(f, np.ndarray) else nib.load(f).get_fdata() + for f in files] + diffs = OrderedDict() + for i, d1 in enumerate(data[:-1]): + # populate empty entries for non-compared + diffs1 = [None] * (i + 1) + + for j, d2 in enumerate(data[i + 1:], i + 1): + + if d1.shape == d2.shape: + abs_diff = np.abs(d1 - d2) + mean_abs = (np.abs(d1) + np.abs(d2)) * 0.5 + candidates = np.logical_or(mean_abs != 0, abs_diff != 0) + + if max_abs: + candidates[abs_diff <= max_abs] = False + + max_abs_diff = np.max(abs_diff) + if np.any(candidates): + rel_diff = abs_diff[candidates] / mean_abs[candidates] + if max_rel: + sub_thr = rel_diff <= max_rel + # Since we operated on sub-selected values already, we need + # to plug them back in + candidates[ + tuple((indexes[sub_thr] for indexes in np.where(candidates))) + ] = False + max_rel_diff = np.max(rel_diff) + else: + max_rel_diff = 0 + + if np.any(candidates): + + diff_rec = OrderedDict() # so that abs goes before relative + + diff_rec['abs'] = max_abs_diff + diff_rec['rel'] = max_rel_diff + diffs1.append(diff_rec) + else: + diffs1.append(None) + + else: + diffs1.append({'CMP': "incompat"}) + + if any(diffs1): + + diffs['DATA(diff %d:)' % (i + 1)] = diffs1 + + return diffs + + def display_diff(files, diff): """Format header differences into a nice string @@ -140,13 +233,14 @@ def display_diff(files, diff): """ output = "" field_width = "{:<15}" + filename_width = "{:<53}" value_width = "{:<55}" output += "These files are different.\n" - output += field_width.format('Field') + output += field_width.format('Field/File') - for f in files: - output += value_width.format(os.path.basename(f)) + for i, f in enumerate(files, 1): + output += "%d:%s" % (i, filename_width.format(os.path.basename(f))) output += "\n" @@ -154,7 +248,12 @@ def display_diff(files, diff): output += field_width.format(key) for item in value: - item_str = str(item) + if isinstance(item, dict): + item_str = ', '.join('%s: %s' % i for i in item.items()) + elif item is None: + item_str = '-' + else: + item_str = str(item) # Value might start/end with some invisible spacing characters so we # would "condition" it on both ends a bit item_str = re.sub('^[ \t]+', '<', item_str) @@ -169,8 +268,37 @@ def display_diff(files, diff): return output +def diff(files, header_fields='all', data_max_abs_diff=None, data_max_rel_diff=None): + assert len(files) >= 2, "Please enter at least two files" + + file_headers = [nib.load(f).header for f in files] + + # signals "all fields" + if header_fields == 'all': + # TODO: header fields might vary across file types, thus prior sensing would be needed + header_fields = file_headers[0].keys() + else: + header_fields = header_fields.split(',') + + diff = get_headers_diff(file_headers, header_fields) + + data_md5_diffs = get_data_hash_diff(files) + if data_md5_diffs: + # provide details, possibly triggering the ignore of the difference + # in data + data_diffs = get_data_diff(files, + max_abs=data_max_abs_diff, + max_rel=data_max_rel_diff) + if data_diffs: + diff['DATA(md5)'] = data_md5_diffs + diff.update(data_diffs) + + return diff + + def main(args=None, out=None): """Getting the show on the road""" + out = out or sys.stdout parser = get_opt_parser() (opts, files) = parser.parse_args(args) @@ -181,27 +309,16 @@ def main(args=None, out=None): # suppress nibabel format-compliance warnings nib.imageglobals.logger.level = 50 - assert len(files) >= 2, "Please enter at least two files" - - file_headers = [nib.load(f).header for f in files] - - # signals "all fields" - if opts.header_fields == 'all': - # TODO: header fields might vary across file types, thus prior sensing would be needed - header_fields = file_headers[0].keys() - else: - header_fields = opts.header_fields.split(',') - - diff = get_headers_diff(file_headers, header_fields) - data_diff = get_data_diff(files) - - if data_diff: - diff['DATA(md5)'] = data_diff + files_diff = diff( + files, + header_fields=opts.header_fields, + data_max_abs_diff=opts.data_max_abs_diff, + data_max_rel_diff=opts.data_max_rel_diff + ) - if diff: - out.write(display_diff(files, diff)) + if files_diff: + out.write(display_diff(files, files_diff)) raise SystemExit(1) - else: out.write("These files are identical.\n") raise SystemExit(0) diff --git a/nibabel/cmdline/tests/test_utils.py b/nibabel/cmdline/tests/test_utils.py index 4aa387b6e5..12f19a1003 100644 --- a/nibabel/cmdline/tests/test_utils.py +++ b/nibabel/cmdline/tests/test_utils.py @@ -11,7 +11,7 @@ import nibabel as nib import numpy as np from nibabel.cmdline.utils import * -from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_diff +from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_hash_diff, get_data_diff from os.path import (join as pjoin) from nibabel.testing import data_path from collections import OrderedDict @@ -96,9 +96,9 @@ def test_display_diff(): ("bitpix", [np.array(8).astype(dtype="uint8"), np.array(16).astype(dtype="uint8")]) ]) - expected_output = "These files are different.\n" + "Field hellokitty.nii.gz" \ - " " \ - "privettovarish.nii.gz \n" \ + expected_output = "These files are different.\n" + "Field/File 1:hellokitty.nii.gz" \ + " " \ + "2:privettovarish.nii.gz \n" \ "datatype " \ "2 " \ "4 \n" \ @@ -114,7 +114,37 @@ def test_get_data_diff(): # testing for identical files specifically as md5 may vary by computer test_names = [pjoin(data_path, f) for f in ('standard.nii.gz', 'standard.nii.gz')] - assert_equal(get_data_diff(test_names), []) + assert_equal(get_data_hash_diff(test_names), []) + + # testing the maximum relative and absolute differences' different use cases + test_array = np.arange(16).reshape(4, 4) + test_array_2 = np.arange(1, 17).reshape(4, 4) + test_array_3 = np.arange(2, 18).reshape(4, 4) + test_array_4 = np.arange(100).reshape(10, 10) + test_array_5 = np.arange(64).reshape(8, 8) + + # same shape, 2 files + assert_equal(get_data_diff([test_array, test_array_2]), + OrderedDict([('DATA(diff 1:)', [None, OrderedDict([('abs', 1), ('rel', 2.0)])])])) + + # same shape, 3 files + assert_equal(get_data_diff([test_array, test_array_2, test_array_3]), + OrderedDict([('DATA(diff 1:)', [None, OrderedDict([('abs', 1), ('rel', 2.0)]), + OrderedDict([('abs', 2), ('rel', 2.0)])]), + ('DATA(diff 2:)', [None, None, + OrderedDict([('abs', 1), ('rel', 0.66666666666666663)])])])) + + # same shape, 2 files, modified maximum abs/rel + assert_equal(get_data_diff([test_array, test_array_2], max_abs=2, max_rel=2), OrderedDict()) + + # different shape, 2 files + assert_equal(get_data_diff([test_array_2, test_array_4]), + OrderedDict([('DATA(diff 1:)', [None, {'CMP': 'incompat'}])])) + + # different shape, 3 files + assert_equal(get_data_diff([test_array_4, test_array_5, test_array_2]), + OrderedDict([('DATA(diff 1:)', [None, {'CMP': 'incompat'}, {'CMP': 'incompat'}]), + ('DATA(diff 2:)', [None, None, {'CMP': 'incompat'}])])) def test_main(): diff --git a/nibabel/tests/test_scripts.py b/nibabel/tests/test_scripts.py index 0aa404a939..a734fbeeda 100644 --- a/nibabel/tests/test_scripts.py +++ b/nibabel/tests/test_scripts.py @@ -72,10 +72,10 @@ def check_nib_diff_examples(): fnames = [pjoin(DATA_PATH, f) for f in ('standard.nii.gz', 'example4d.nii.gz')] code, stdout, stderr = run_command(['nib-diff'] + fnames, check_code=False) - checked_fields = ["Field", "regular", "dim_info", "dim", "datatype", "bitpix", "pixdim", "slice_end", + checked_fields = ["Field/File", "regular", "dim_info", "dim", "datatype", "bitpix", "pixdim", "slice_end", "xyzt_units", "cal_max", "descrip", "qform_code", "sform_code", "quatern_b", "quatern_c", "quatern_d", "qoffset_x", "qoffset_y", "qoffset_z", "srow_x", - "srow_y", "srow_z", "DATA(md5)"] + "srow_y", "srow_z", "DATA(md5)", "DATA(diff 1:)"] for item in checked_fields: assert_true(item in stdout)