From cd5a741c17c698194dfd99c9ff1f6e0bb338c6f8 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 14 Sep 2018 16:38:33 -0400 Subject: [PATCH 01/12] RF+ENH: nib-diff - allow to specify absolute and/or relative maximal differences to tolerate So now it should be possible to get an idea on how much data in the given files differs: $> nib-diff --ma 0.000001 --mr .001 ./tests-run/output/./sub-1_T1w_5mm_noise_corrected.nii.gz /tmp/sub-1_T1w_5mm_noise_corrected.nii.gz These files are different. Field 1:sub-1_T1w_5mm_noise_corrected.nii.gz 2:sub-1_T1w_5mm_noise_corrected.nii.gz DATA(md5) 65df09c06b236342eaf7e2fe57aabf55 3c6e9069e6e054e714f2894419848df0 DATA(diff 1:) - abs: 7.6293945e-06, rel: 0.002224694 --- nibabel/cmdline/diff.py | 145 +++++++++++++++++++++++----- nibabel/cmdline/tests/test_utils.py | 4 +- 2 files changed, 123 insertions(+), 26 deletions(-) diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py index 21cd7b40a9..fe6bc7bce5 100755 --- a/nibabel/cmdline/diff.py +++ b/nibabel/cmdline/diff.py @@ -39,6 +39,21 @@ def get_opt_parser(): Option("-H", "--header-fields", dest="header_fields", default='all', help="Header fields (comma separated) to be printed as well (if present)"), + + Option("--ma", "--data-max-abs-diff", + dest="data_max_abs_diff", + type=float, + default=0.0, + help="Maximal absolute difference in data between files to tolerate."), + + Option("--mr", "--data-max-rel-diff", + dest="data_max_rel_diff", + type=float, + default=0.0, + help="Maximal relative difference in data between files to tolerate." + " If also --data-max-abs-diff specified, only the data points " + " with absolute difference greater than that value would be " + " considered for relative difference check."), ]) return p @@ -101,8 +116,8 @@ def get_headers_diff(file_headers, names=None): return difference -def get_data_diff(files): - """Get difference between md5 values +def get_data_md5_diff(files): + """Get difference between md5 values of data Parameters ---------- @@ -125,6 +140,65 @@ def get_data_diff(files): return md5sums +def get_data_diff(files, max_abs=0, max_rel=0): + """Get difference between data + + Parameters + ---------- + max_abs: float, optional + Maximal absolute difference to tolerate. + max_rel: float, optional + Maximal relative (`abs(diff)/mean(diff)`) difference to tolerate. + If `max_abs` is specified, then those data points with lesser than that + absolute difference, are not considered for relative difference testing + + Returns + ------- + TODO + """ + # we are doomed to keep them in RAM now + data = [nib.load(f).get_data() for f in files] + diffs = OrderedDict() + for i, d1 in enumerate(data[:-1]): + # populate empty entries for non-compared + diffs1 = [None] * (i+1) + + for j, d2 in enumerate(data[i+1:], i + 1): + abs_diff = np.abs(d1 - d2) + mean_abs = (np.abs(d1) + np.abs(d2)) * 0.5 + candidates = np.logical_or(mean_abs != 0, abs_diff != 0) + + if max_abs: + candidates[abs_diff <= max_abs] = False + + max_abs_diff = np.max(abs_diff) + if np.any(candidates): + rel_diff = abs_diff[candidates] / mean_abs[candidates] + if max_rel: + sub_thr = rel_diff <= max_rel + # Since we operated on sub-selected values already, we need + # to plug them back in + candidates[ + tuple((indexes[sub_thr] for indexes in np.where(candidates))) + ] = False + max_rel_diff = np.max(rel_diff) + else: + max_rel_diff = 0 + + if np.any(candidates): + diff_rec = OrderedDict() # so that abs goes before relative + diff_rec['abs'] = max_abs_diff + diff_rec['rel'] = max_rel_diff + diffs1.append(diff_rec) + else: + diffs1.append(None) + + if any(diffs1): + diffs['DATA(diff %d:)' % (i+1)] = diffs1 + + return diffs + + def display_diff(files, diff): """Format header differences into a nice string @@ -145,8 +219,8 @@ def display_diff(files, diff): output += "These files are different.\n" output += field_width.format('Field') - for f in files: - output += value_width.format(os.path.basename(f)) + for i, f in enumerate(files, 1): + output += "%d:%s" % (i, value_width.format(os.path.basename(f))) output += "\n" @@ -154,7 +228,12 @@ def display_diff(files, diff): output += field_width.format(key) for item in value: - item_str = str(item) + if isinstance(item, dict): + item_str = ', '.join('%s: %s' % i for i in item.items()) + elif item is None: + item_str = '-' + else: + item_str = str(item) # Value might start/end with some invisible spacing characters so we # would "condition" it on both ends a bit item_str = re.sub('^[ \t]+', '<', item_str) @@ -169,8 +248,37 @@ def display_diff(files, diff): return output +def diff(files, header_fields='all', data_max_abs_diff=None, data_max_rel_diff=None): + assert len(files) >= 2, "Please enter at least two files" + + file_headers = [nib.load(f).header for f in files] + + # signals "all fields" + if header_fields == 'all': + # TODO: header fields might vary across file types, thus prior sensing would be needed + header_fields = file_headers[0].keys() + else: + header_fields = header_fields.split(',') + + diff = get_headers_diff(file_headers, header_fields) + + data_md5_diffs = get_data_md5_diff(files) + if data_md5_diffs: + # provide details, possibly triggering the ignore of the difference + # in data + data_diffs = get_data_diff(files, + max_abs=data_max_abs_diff, + max_rel=data_max_rel_diff) + if data_diffs: + diff['DATA(md5)'] = data_md5_diffs + diff.update(data_diffs) + + return diff + + def main(args=None, out=None): """Getting the show on the road""" + out = out or sys.stdout parser = get_opt_parser() (opts, files) = parser.parse_args(args) @@ -181,27 +289,16 @@ def main(args=None, out=None): # suppress nibabel format-compliance warnings nib.imageglobals.logger.level = 50 - assert len(files) >= 2, "Please enter at least two files" - - file_headers = [nib.load(f).header for f in files] - - # signals "all fields" - if opts.header_fields == 'all': - # TODO: header fields might vary across file types, thus prior sensing would be needed - header_fields = file_headers[0].keys() - else: - header_fields = opts.header_fields.split(',') + files_diff = diff( + files, + header_fields=opts.header_fields, + data_max_abs_diff=opts.data_max_abs_diff, + data_max_rel_diff=opts.data_max_rel_diff + ) - diff = get_headers_diff(file_headers, header_fields) - data_diff = get_data_diff(files) - - if data_diff: - diff['DATA(md5)'] = data_diff - - if diff: - out.write(display_diff(files, diff)) + if files_diff: + out.write(display_diff(files, files_diff)) raise SystemExit(1) - else: out.write("These files are identical.\n") raise SystemExit(0) diff --git a/nibabel/cmdline/tests/test_utils.py b/nibabel/cmdline/tests/test_utils.py index 4aa387b6e5..8ee891ed4c 100644 --- a/nibabel/cmdline/tests/test_utils.py +++ b/nibabel/cmdline/tests/test_utils.py @@ -11,7 +11,7 @@ import nibabel as nib import numpy as np from nibabel.cmdline.utils import * -from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_diff +from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_md5_diff from os.path import (join as pjoin) from nibabel.testing import data_path from collections import OrderedDict @@ -114,7 +114,7 @@ def test_get_data_diff(): # testing for identical files specifically as md5 may vary by computer test_names = [pjoin(data_path, f) for f in ('standard.nii.gz', 'standard.nii.gz')] - assert_equal(get_data_diff(test_names), []) + assert_equal(get_data_md5_diff(test_names), []) def test_main(): From 018eceb71a7708d866af080dfafc1936ae8304ae Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 14 Sep 2018 16:41:42 -0400 Subject: [PATCH 02/12] ENH: nib-diff Field/File not just Field in the header --- nibabel/cmdline/diff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py index fe6bc7bce5..b879e327ef 100755 --- a/nibabel/cmdline/diff.py +++ b/nibabel/cmdline/diff.py @@ -217,7 +217,7 @@ def display_diff(files, diff): value_width = "{:<55}" output += "These files are different.\n" - output += field_width.format('Field') + output += field_width.format('Field/File') for i, f in enumerate(files, 1): output += "%d:%s" % (i, value_width.format(os.path.basename(f))) From 833b4dfccddf8d8ff41f457c7cff92b0b72a5722 Mon Sep 17 00:00:00 2001 From: "Christopher P. Cheng" Date: Fri, 21 Sep 2018 13:37:57 -0400 Subject: [PATCH 03/12] changed as commented out in the pull request --- nibabel/cmdline/diff.py | 147 +++++++++++++++++++++++----- nibabel/cmdline/tests/test_utils.py | 8 +- nibabel/tests/test_scripts.py | 2 +- 3 files changed, 127 insertions(+), 30 deletions(-) mode change 100644 => 100755 nibabel/cmdline/tests/test_utils.py mode change 100644 => 100755 nibabel/tests/test_scripts.py diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py index 21cd7b40a9..364b3fe0da 100755 --- a/nibabel/cmdline/diff.py +++ b/nibabel/cmdline/diff.py @@ -39,6 +39,21 @@ def get_opt_parser(): Option("-H", "--header-fields", dest="header_fields", default='all', help="Header fields (comma separated) to be printed as well (if present)"), + + Option("--ma", "--data-max-abs-diff", + dest="data_max_abs_diff", + type=float, + default=0.0, + help="Maximal absolute difference in data between files to tolerate."), + + Option("--mr", "--data-max-rel-diff", + dest="data_max_rel_diff", + type=float, + default=0.0, + help="Maximal relative difference in data between files to tolerate." + " If also --data-max-abs-diff specified, only the data points " + " with absolute difference greater than that value would be " + " considered for relative difference check."), ]) return p @@ -101,8 +116,8 @@ def get_headers_diff(file_headers, names=None): return difference -def get_data_diff(files): - """Get difference between md5 values +def get_data_md5_diff(files): + """Get difference between md5 values of data Parameters ---------- @@ -125,6 +140,65 @@ def get_data_diff(files): return md5sums +def get_data_diff(files, max_abs=0, max_rel=0): + """Get difference between data + + Parameters + ---------- + max_abs: float, optional + Maximal absolute difference to tolerate. + max_rel: float, optional + Maximal relative (`abs(diff)/mean(diff)`) difference to tolerate. + If `max_abs` is specified, then those data points with lesser than that + absolute difference, are not considered for relative difference testing + + Returns + ------- + TODO + """ + # we are doomed to keep them in RAM now + data = [nib.load(f).get_data() for f in files] + diffs = OrderedDict() + for i, d1 in enumerate(data[:-1]): + # populate empty entries for non-compared + diffs1 = [None] * (i + 1) + + for j, d2 in enumerate(data[i + 1:], i + 1): + abs_diff = np.abs(d1 - d2) + mean_abs = (np.abs(d1) + np.abs(d2)) * 0.5 + candidates = np.logical_or(mean_abs != 0, abs_diff != 0) + + if max_abs: + candidates[abs_diff <= max_abs] = False + + max_abs_diff = np.max(abs_diff) + if np.any(candidates): + rel_diff = abs_diff[candidates] / mean_abs[candidates] + if max_rel: + sub_thr = rel_diff <= max_rel + # Since we operated on sub-selected values already, we need + # to plug them back in + candidates[ + tuple((indexes[sub_thr] for indexes in np.where(candidates))) + ] = False + max_rel_diff = np.max(rel_diff) + else: + max_rel_diff = 0 + + if np.any(candidates): + diff_rec = OrderedDict() # so that abs goes before relative + diff_rec['abs'] = max_abs_diff + diff_rec['rel'] = max_rel_diff + diffs1.append(diff_rec) + else: + diffs1.append(None) + + if any(diffs1): + diffs['DATA(diff %d:)' % (i + 1)] = diffs1 + + return diffs + + def display_diff(files, diff): """Format header differences into a nice string @@ -143,10 +217,10 @@ def display_diff(files, diff): value_width = "{:<55}" output += "These files are different.\n" - output += field_width.format('Field') + output += field_width.format('Field/File') - for f in files: - output += value_width.format(os.path.basename(f)) + for i, f in enumerate(files, 1): + output += "%d:%s" % (i, value_width.format(os.path.basename(f))) output += "\n" @@ -154,7 +228,12 @@ def display_diff(files, diff): output += field_width.format(key) for item in value: - item_str = str(item) + if isinstance(item, dict): + item_str = ', '.join('%s: %s' % i for i in item.items()) + elif item is None: + item_str = '-' + else: + item_str = str(item) # Value might start/end with some invisible spacing characters so we # would "condition" it on both ends a bit item_str = re.sub('^[ \t]+', '<', item_str) @@ -169,8 +248,37 @@ def display_diff(files, diff): return output +def diff(files, header_fields='all', data_max_abs_diff=None, data_max_rel_diff=None): + assert len(files) >= 2, "Please enter at least two files" + + file_headers = [nib.load(f).header for f in files] + + # signals "all fields" + if header_fields == 'all': + # TODO: header fields might vary across file types, thus prior sensing would be needed + header_fields = file_headers[0].keys() + else: + header_fields = header_fields.split(',') + + diff = get_headers_diff(file_headers, header_fields) + + data_md5_diffs = get_data_md5_diff(files) + if data_md5_diffs: + # provide details, possibly triggering the ignore of the difference + # in data + data_diffs = get_data_diff(files, + max_abs=data_max_abs_diff, + max_rel=data_max_rel_diff) + if data_diffs: + diff['DATA(md5)'] = data_md5_diffs + diff.update(data_diffs) + + return diff + + def main(args=None, out=None): """Getting the show on the road""" + out = out or sys.stdout parser = get_opt_parser() (opts, files) = parser.parse_args(args) @@ -181,27 +289,16 @@ def main(args=None, out=None): # suppress nibabel format-compliance warnings nib.imageglobals.logger.level = 50 - assert len(files) >= 2, "Please enter at least two files" - - file_headers = [nib.load(f).header for f in files] - - # signals "all fields" - if opts.header_fields == 'all': - # TODO: header fields might vary across file types, thus prior sensing would be needed - header_fields = file_headers[0].keys() - else: - header_fields = opts.header_fields.split(',') + files_diff = diff( + files, + header_fields=opts.header_fields, + data_max_abs_diff=opts.data_max_abs_diff, + data_max_rel_diff=opts.data_max_rel_diff + ) - diff = get_headers_diff(file_headers, header_fields) - data_diff = get_data_diff(files) - - if data_diff: - diff['DATA(md5)'] = data_diff - - if diff: - out.write(display_diff(files, diff)) + if files_diff: + out.write(display_diff(files, files_diff)) raise SystemExit(1) - else: out.write("These files are identical.\n") raise SystemExit(0) diff --git a/nibabel/cmdline/tests/test_utils.py b/nibabel/cmdline/tests/test_utils.py old mode 100644 new mode 100755 index 4aa387b6e5..45c3d32cc6 --- a/nibabel/cmdline/tests/test_utils.py +++ b/nibabel/cmdline/tests/test_utils.py @@ -11,7 +11,7 @@ import nibabel as nib import numpy as np from nibabel.cmdline.utils import * -from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_diff +from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_md5_diff from os.path import (join as pjoin) from nibabel.testing import data_path from collections import OrderedDict @@ -96,8 +96,8 @@ def test_display_diff(): ("bitpix", [np.array(8).astype(dtype="uint8"), np.array(16).astype(dtype="uint8")]) ]) - expected_output = "These files are different.\n" + "Field hellokitty.nii.gz" \ - " " \ + expected_output = "These files are different.\n" + "Field/File hellokitty.nii.gz" \ + " " \ "privettovarish.nii.gz \n" \ "datatype " \ "2 " \ @@ -114,7 +114,7 @@ def test_get_data_diff(): # testing for identical files specifically as md5 may vary by computer test_names = [pjoin(data_path, f) for f in ('standard.nii.gz', 'standard.nii.gz')] - assert_equal(get_data_diff(test_names), []) + assert_equal(get_data_md5_diff(test_names), []) def test_main(): diff --git a/nibabel/tests/test_scripts.py b/nibabel/tests/test_scripts.py old mode 100644 new mode 100755 index 0aa404a939..6aba6c4038 --- a/nibabel/tests/test_scripts.py +++ b/nibabel/tests/test_scripts.py @@ -72,7 +72,7 @@ def check_nib_diff_examples(): fnames = [pjoin(DATA_PATH, f) for f in ('standard.nii.gz', 'example4d.nii.gz')] code, stdout, stderr = run_command(['nib-diff'] + fnames, check_code=False) - checked_fields = ["Field", "regular", "dim_info", "dim", "datatype", "bitpix", "pixdim", "slice_end", + checked_fields = ["Field/File", "regular", "dim_info", "dim", "datatype", "bitpix", "pixdim", "slice_end", "xyzt_units", "cal_max", "descrip", "qform_code", "sform_code", "quatern_b", "quatern_c", "quatern_d", "qoffset_x", "qoffset_y", "qoffset_z", "srow_x", "srow_y", "srow_z", "DATA(md5)"] From 1e33ea7db4ff54f06b38bfcfbf6d988f82604bc6 Mon Sep 17 00:00:00 2001 From: "Christopher P. Cheng" Date: Tue, 25 Sep 2018 11:01:28 -0400 Subject: [PATCH 04/12] RF: anticipated files of different shapes, fixed table display, corrected tests --- nibabel/cmdline/diff.py | 64 +++++++++++++++-------------- nibabel/cmdline/tests/test_utils.py | 6 +-- nibabel/tests/test_scripts.py | 5 ++- 3 files changed, 41 insertions(+), 34 deletions(-) diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py index 171b5ae8c3..edfbb0c5d0 100755 --- a/nibabel/cmdline/diff.py +++ b/nibabel/cmdline/diff.py @@ -165,42 +165,45 @@ def get_data_diff(files, max_abs=0, max_rel=0): for j, d2 in enumerate(data[i + 1:], i + 1): - abs_diff = np.abs(d1 - d2) - mean_abs = (np.abs(d1) + np.abs(d2)) * 0.5 - candidates = np.logical_or(mean_abs != 0, abs_diff != 0) - - if max_abs: - candidates[abs_diff <= max_abs] = False - - max_abs_diff = np.max(abs_diff) - if np.any(candidates): - rel_diff = abs_diff[candidates] / mean_abs[candidates] - if max_rel: - sub_thr = rel_diff <= max_rel - # Since we operated on sub-selected values already, we need - # to plug them back in - candidates[ - tuple((indexes[sub_thr] for indexes in np.where(candidates))) - ] = False - max_rel_diff = np.max(rel_diff) - else: - max_rel_diff = 0 - - if np.any(candidates): + if d1.shape == d2.shape: + abs_diff = np.abs(d1 - d2) + mean_abs = (np.abs(d1) + np.abs(d2)) * 0.5 + candidates = np.logical_or(mean_abs != 0, abs_diff != 0) + + if max_abs: + candidates[abs_diff <= max_abs] = False + + max_abs_diff = np.max(abs_diff) + if np.any(candidates): + rel_diff = abs_diff[candidates] / mean_abs[candidates] + if max_rel: + sub_thr = rel_diff <= max_rel + # Since we operated on sub-selected values already, we need + # to plug them back in + candidates[ + tuple((indexes[sub_thr] for indexes in np.where(candidates))) + ] = False + max_rel_diff = np.max(rel_diff) + else: + max_rel_diff = 0 + + if np.any(candidates): + + diff_rec = OrderedDict() # so that abs goes before relative + + diff_rec['abs'] = max_abs_diff + diff_rec['rel'] = max_rel_diff + diffs1.append(diff_rec) + else: + diffs1.append(None) - diff_rec = OrderedDict() # so that abs goes before relative - - diff_rec['abs'] = max_abs_diff - diff_rec['rel'] = max_rel_diff - diffs1.append(diff_rec) else: - diffs1.append(None) + diffs1.append({'CMP': "incompat"}) if any(diffs1): diffs['DATA(diff %d:)' % (i + 1)] = diffs1 - return diffs @@ -219,13 +222,14 @@ def display_diff(files, diff): """ output = "" field_width = "{:<15}" + filename_width = "{:<53}" value_width = "{:<55}" output += "These files are different.\n" output += field_width.format('Field/File') for i, f in enumerate(files, 1): - output += "%d:%s" % (i, value_width.format(os.path.basename(f))) + output += "%d:%s" % (i, filename_width.format(os.path.basename(f))) output += "\n" diff --git a/nibabel/cmdline/tests/test_utils.py b/nibabel/cmdline/tests/test_utils.py index 45c3d32cc6..6d4fec2344 100755 --- a/nibabel/cmdline/tests/test_utils.py +++ b/nibabel/cmdline/tests/test_utils.py @@ -96,9 +96,9 @@ def test_display_diff(): ("bitpix", [np.array(8).astype(dtype="uint8"), np.array(16).astype(dtype="uint8")]) ]) - expected_output = "These files are different.\n" + "Field/File hellokitty.nii.gz" \ - " " \ - "privettovarish.nii.gz \n" \ + expected_output = "These files are different.\n" + "Field/File 1:hellokitty.nii.gz" \ + " " \ + "2:privettovarish.nii.gz \n" \ "datatype " \ "2 " \ "4 \n" \ diff --git a/nibabel/tests/test_scripts.py b/nibabel/tests/test_scripts.py index 6aba6c4038..537c1df992 100755 --- a/nibabel/tests/test_scripts.py +++ b/nibabel/tests/test_scripts.py @@ -75,8 +75,11 @@ def check_nib_diff_examples(): checked_fields = ["Field/File", "regular", "dim_info", "dim", "datatype", "bitpix", "pixdim", "slice_end", "xyzt_units", "cal_max", "descrip", "qform_code", "sform_code", "quatern_b", "quatern_c", "quatern_d", "qoffset_x", "qoffset_y", "qoffset_z", "srow_x", - "srow_y", "srow_z", "DATA(md5)"] + "srow_y", "srow_z", "DATA(md5)", "DATA(diff 1:)"] for item in checked_fields: + if item not in stdout: + print(item) + print(stdout) assert_true(item in stdout) fnames2 = [pjoin(DATA_PATH, f) From 76ca32f9ecb9110c67efd425c33c3ed7232e257e Mon Sep 17 00:00:00 2001 From: "Christopher P. Cheng" Date: Thu, 27 Sep 2018 12:14:41 -0400 Subject: [PATCH 05/12] elaborated docstring, modified get_data_diff to allow direct array input, added tests for coverage --- nibabel/cmdline/diff.py | 4 +++- nibabel/cmdline/tests/test_utils.py | 32 ++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py index edfbb0c5d0..caa8e385fd 100755 --- a/nibabel/cmdline/diff.py +++ b/nibabel/cmdline/diff.py @@ -145,6 +145,8 @@ def get_data_diff(files, max_abs=0, max_rel=0): Parameters ---------- + files: list of (str or ndarray) + If list of strings is provided -- they must be existing file names max_abs: float, optional Maximal absolute difference to tolerate. max_rel: float, optional @@ -157,7 +159,7 @@ def get_data_diff(files, max_abs=0, max_rel=0): TODO """ # we are doomed to keep them in RAM now - data = [nib.load(f).get_data() for f in files] + data = [f if isinstance(f, np.ndarray) else nib.load(f).get_data() for f in files] diffs = OrderedDict() for i, d1 in enumerate(data[:-1]): # populate empty entries for non-compared diff --git a/nibabel/cmdline/tests/test_utils.py b/nibabel/cmdline/tests/test_utils.py index 6d4fec2344..50d72a5ade 100755 --- a/nibabel/cmdline/tests/test_utils.py +++ b/nibabel/cmdline/tests/test_utils.py @@ -11,7 +11,7 @@ import nibabel as nib import numpy as np from nibabel.cmdline.utils import * -from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_md5_diff +from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_md5_diff, get_data_diff from os.path import (join as pjoin) from nibabel.testing import data_path from collections import OrderedDict @@ -116,6 +116,36 @@ def test_get_data_diff(): for f in ('standard.nii.gz', 'standard.nii.gz')] assert_equal(get_data_md5_diff(test_names), []) + # testing the maximum relative and absolute differences' different use cases + test_array = np.arange(16).reshape(4, 4) + test_array_2 = np.arange(1, 17).reshape(4, 4) + test_array_3 = np.arange(2, 18).reshape(4, 4) + test_array_4 = np.arange(100).reshape(10, 10) + test_array_5 = np.arange(64).reshape(8, 8) + + # same shape, 2 files + assert_equal(get_data_diff([test_array, test_array_2]), + OrderedDict([('DATA(diff 1:)', [None, OrderedDict([('abs', 1), ('rel', 2.0)])])])) + + # same shape, 3 files + assert_equal(get_data_diff([test_array, test_array_2, test_array_3]), + OrderedDict([('DATA(diff 1:)', [None, OrderedDict([('abs', 1), ('rel', 2.0)]), + OrderedDict([('abs', 2), ('rel', 2.0)])]), + ('DATA(diff 2:)', [None, None, + OrderedDict([('abs', 1), ('rel', 0.66666666666666663)])])])) + + # same shape, 2 files, modified maximum abs/rel + assert_equal(get_data_diff([test_array, test_array_2], max_abs=2, max_rel=2), OrderedDict()) + + # different shape, 2 files + assert_equal(get_data_diff([test_array_2, test_array_4]), + OrderedDict([('DATA(diff 1:)', [None, {'CMP': 'incompat'}])])) + + # different shape, 3 files + assert_equal(get_data_diff([test_array_4, test_array_5, test_array_2]), + OrderedDict([('DATA(diff 1:)', [None, {'CMP': 'incompat'}, {'CMP': 'incompat'}]), + ('DATA(diff 2:)', [None, None, {'CMP': 'incompat'}])])) + def test_main(): test_names = [pjoin(data_path, f) From 0aa63705819a6af1a843bf57ffc00e906e0260fd Mon Sep 17 00:00:00 2001 From: "Christopher P. Cheng" Date: Fri, 28 Sep 2018 12:59:21 -0400 Subject: [PATCH 06/12] added to diff documentation, undid executable change, took out debugging script in test_scripts --- nibabel/cmdline/diff.py | 3 ++- nibabel/cmdline/tests/test_utils.py | 0 nibabel/tests/test_scripts.py | 3 --- 3 files changed, 2 insertions(+), 4 deletions(-) mode change 100755 => 100644 nibabel/cmdline/tests/test_utils.py diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py index caa8e385fd..74c100ce48 100755 --- a/nibabel/cmdline/diff.py +++ b/nibabel/cmdline/diff.py @@ -156,7 +156,8 @@ def get_data_diff(files, max_abs=0, max_rel=0): Returns ------- - TODO + OrderedDict + str: absolute and relative differences of each file, given as float """ # we are doomed to keep them in RAM now data = [f if isinstance(f, np.ndarray) else nib.load(f).get_data() for f in files] diff --git a/nibabel/cmdline/tests/test_utils.py b/nibabel/cmdline/tests/test_utils.py old mode 100755 new mode 100644 diff --git a/nibabel/tests/test_scripts.py b/nibabel/tests/test_scripts.py index 537c1df992..a734fbeeda 100755 --- a/nibabel/tests/test_scripts.py +++ b/nibabel/tests/test_scripts.py @@ -77,9 +77,6 @@ def check_nib_diff_examples(): "quatern_c", "quatern_d", "qoffset_x", "qoffset_y", "qoffset_z", "srow_x", "srow_y", "srow_z", "DATA(md5)", "DATA(diff 1:)"] for item in checked_fields: - if item not in stdout: - print(item) - print(stdout) assert_true(item in stdout) fnames2 = [pjoin(DATA_PATH, f) From d057249fbf818c3a742240c1f977458c40c789f1 Mon Sep 17 00:00:00 2001 From: "Christopher P. Cheng" Date: Fri, 28 Sep 2018 13:04:29 -0400 Subject: [PATCH 07/12] undid permission snafu on test_scripts --- nibabel/tests/test_scripts.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 nibabel/tests/test_scripts.py diff --git a/nibabel/tests/test_scripts.py b/nibabel/tests/test_scripts.py old mode 100755 new mode 100644 From 76ee358c2d67ad4333eb0fead63dbad654efa48b Mon Sep 17 00:00:00 2001 From: "Christopher P. Cheng" Date: Tue, 2 Oct 2018 10:49:06 -0400 Subject: [PATCH 08/12] docstring and function name clarification, change get_data to get_fdata() --- nibabel/cmdline/diff.py | 18 +++++++++++------- nibabel/cmdline/tests/test_utils.py | 4 ++-- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py index 74c100ce48..13b0781b03 100755 --- a/nibabel/cmdline/diff.py +++ b/nibabel/cmdline/diff.py @@ -51,7 +51,7 @@ def get_opt_parser(): type=float, default=0.0, help="Maximal relative difference in data between files to tolerate." - " If also --data-max-abs-diff specified, only the data points " + " If --data-max-abs-diff is also specified, only the data points " " with absolute difference greater than that value would be " " considered for relative difference check."), ]) @@ -116,7 +116,7 @@ def get_headers_diff(file_headers, names=None): return difference -def get_data_md5_diff(files): +def get_data_hash_diff(files): """Get difference between md5 values of data Parameters @@ -130,7 +130,7 @@ def get_data_md5_diff(files): """ md5sums = [ - hashlib.md5(np.ascontiguousarray(nib.load(f).get_data(), dtype=np.float32)).hexdigest() + hashlib.md5(np.ascontiguousarray(nib.load(f).get_fdata())).hexdigest() for f in files ] @@ -156,11 +156,15 @@ def get_data_diff(files, max_abs=0, max_rel=0): Returns ------- - OrderedDict - str: absolute and relative differences of each file, given as float + diffs: OrderedDict + An ordered dict with a record per each file which has differences with other files subsequent detected. + Each record is a list of difference records, one per each file pair. Each difference record is an Ordered + Dict with possible keys 'abs' or 'rel' showing maximal absolute or relative differences in the file + or record ('CMP': 'incompat') if file shapes are incompatible. """ + # we are doomed to keep them in RAM now - data = [f if isinstance(f, np.ndarray) else nib.load(f).get_data() for f in files] + data = [f if isinstance(f, np.ndarray) else nib.load(f).get_fdata() for f in files] diffs = OrderedDict() for i, d1 in enumerate(data[:-1]): # populate empty entries for non-compared @@ -274,7 +278,7 @@ def diff(files, header_fields='all', data_max_abs_diff=None, data_max_rel_diff=N diff = get_headers_diff(file_headers, header_fields) - data_md5_diffs = get_data_md5_diff(files) + data_md5_diffs = get_data_hash_diff(files) if data_md5_diffs: # provide details, possibly triggering the ignore of the difference # in data diff --git a/nibabel/cmdline/tests/test_utils.py b/nibabel/cmdline/tests/test_utils.py index 50d72a5ade..12f19a1003 100644 --- a/nibabel/cmdline/tests/test_utils.py +++ b/nibabel/cmdline/tests/test_utils.py @@ -11,7 +11,7 @@ import nibabel as nib import numpy as np from nibabel.cmdline.utils import * -from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_md5_diff, get_data_diff +from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_hash_diff, get_data_diff from os.path import (join as pjoin) from nibabel.testing import data_path from collections import OrderedDict @@ -114,7 +114,7 @@ def test_get_data_diff(): # testing for identical files specifically as md5 may vary by computer test_names = [pjoin(data_path, f) for f in ('standard.nii.gz', 'standard.nii.gz')] - assert_equal(get_data_md5_diff(test_names), []) + assert_equal(get_data_hash_diff(test_names), []) # testing the maximum relative and absolute differences' different use cases test_array = np.arange(16).reshape(4, 4) From 034c2768dd9fd8628f2236c739eeda50e125f69b Mon Sep 17 00:00:00 2001 From: "Christopher P. Cheng" Date: Tue, 2 Oct 2018 11:23:39 -0400 Subject: [PATCH 09/12] corrected styles per Travis, limited fdata to float32 --- nibabel/cmdline/diff.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py index 13b0781b03..fe107a7da5 100755 --- a/nibabel/cmdline/diff.py +++ b/nibabel/cmdline/diff.py @@ -130,7 +130,7 @@ def get_data_hash_diff(files): """ md5sums = [ - hashlib.md5(np.ascontiguousarray(nib.load(f).get_fdata())).hexdigest() + hashlib.md5(np.ascontiguousarray(nib.load(f).get_fdata(dtype=np.float32))).hexdigest() for f in files ] @@ -157,14 +157,17 @@ def get_data_diff(files, max_abs=0, max_rel=0): Returns ------- diffs: OrderedDict - An ordered dict with a record per each file which has differences with other files subsequent detected. - Each record is a list of difference records, one per each file pair. Each difference record is an Ordered - Dict with possible keys 'abs' or 'rel' showing maximal absolute or relative differences in the file - or record ('CMP': 'incompat') if file shapes are incompatible. + An ordered dict with a record per each file which has differences + with other files subsequent detected. Each record is a list of + difference records, one per each file pair. + Each difference record is an Ordered Dict with possible keys + 'abs' or 'rel' showing maximal absolute or relative differences + in the file or the record ('CMP': 'incompat') if file shapes + are incompatible. """ - + # we are doomed to keep them in RAM now - data = [f if isinstance(f, np.ndarray) else nib.load(f).get_fdata() for f in files] + data = [f if isinstance(f, np.ndarray) else nib.load(f).get_fdata(dtype=np.float32) for f in files] diffs = OrderedDict() for i, d1 in enumerate(data[:-1]): # populate empty entries for non-compared From 19fcdd5c6337197c81acbdd71416df3eedcfe67e Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Wed, 3 Oct 2018 09:30:45 -0400 Subject: [PATCH 10/12] STY: Break overly-long line --- nibabel/cmdline/diff.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py index fe107a7da5..1d0afb1ece 100755 --- a/nibabel/cmdline/diff.py +++ b/nibabel/cmdline/diff.py @@ -167,7 +167,8 @@ def get_data_diff(files, max_abs=0, max_rel=0): """ # we are doomed to keep them in RAM now - data = [f if isinstance(f, np.ndarray) else nib.load(f).get_fdata(dtype=np.float32) for f in files] + data = [f if isinstance(f, np.ndarray) else nib.load(f).get_fdata(dtype=np.float32) + for f in files] diffs = OrderedDict() for i, d1 in enumerate(data[:-1]): # populate empty entries for non-compared From 93c7bb63d428fcf50578d22e9eac521e75178148 Mon Sep 17 00:00:00 2001 From: "Christopher P. Cheng" Date: Wed, 3 Oct 2018 13:03:18 -0400 Subject: [PATCH 11/12] prepared for future PR to allow modification of dtype used in diff comparison --- nibabel/cmdline/diff.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py index fe107a7da5..fa8d1adb60 100755 --- a/nibabel/cmdline/diff.py +++ b/nibabel/cmdline/diff.py @@ -130,7 +130,7 @@ def get_data_hash_diff(files): """ md5sums = [ - hashlib.md5(np.ascontiguousarray(nib.load(f).get_fdata(dtype=np.float32))).hexdigest() + hashlib.md5(np.ascontiguousarray(nib.load(f).get_fdata())).hexdigest() for f in files ] @@ -167,7 +167,8 @@ def get_data_diff(files, max_abs=0, max_rel=0): """ # we are doomed to keep them in RAM now - data = [f if isinstance(f, np.ndarray) else nib.load(f).get_fdata(dtype=np.float32) for f in files] + data = [f if isinstance(f, np.ndarray) else nib.load(f).get_fdata() + for f in files] diffs = OrderedDict() for i, d1 in enumerate(data[:-1]): # populate empty entries for non-compared From 716b1c6d7b5312aae699a7431d4aa2b78c7a62b0 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Wed, 3 Oct 2018 13:18:12 -0400 Subject: [PATCH 12/12] CI: Update pip/setuptools in AppVeyor --- appveyor.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/appveyor.yml b/appveyor.yml index 3bb9c3d074..e1ecfad43e 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -19,6 +19,9 @@ install: # the parent CMD process). - SET PATH=%PYTHON%;%PYTHON%\Scripts;%PATH% + # Update install environment + - pip install --upgrade pip setuptools + # Install the dependencies of the project. - pip install numpy scipy matplotlib nose h5py mock hypothesis pydicom - pip install .