From cd5a741c17c698194dfd99c9ff1f6e0bb338c6f8 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Fri, 14 Sep 2018 16:38:33 -0400
Subject: [PATCH 01/12] RF+ENH: nib-diff - allow to specify absolute and/or
 relative maximal differences to tolerate

So now it should be possible to get an idea on how much data in the given files differs:

    $> nib-diff --ma 0.000001 --mr .001 ./tests-run/output/./sub-1_T1w_5mm_noise_corrected.nii.gz /tmp/sub-1_T1w_5mm_noise_corrected.nii.gz
    These files are different.
    Field          1:sub-1_T1w_5mm_noise_corrected.nii.gz                   2:sub-1_T1w_5mm_noise_corrected.nii.gz
    DATA(md5)      65df09c06b236342eaf7e2fe57aabf55                       3c6e9069e6e054e714f2894419848df0
    DATA(diff 1:)  -                                                      abs: 7.6293945e-06, rel: 0.002224694
---
 nibabel/cmdline/diff.py             | 145 +++++++++++++++++++++++-----
 nibabel/cmdline/tests/test_utils.py |   4 +-
 2 files changed, 123 insertions(+), 26 deletions(-)

diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py
index 21cd7b40a9..fe6bc7bce5 100755
--- a/nibabel/cmdline/diff.py
+++ b/nibabel/cmdline/diff.py
@@ -39,6 +39,21 @@ def get_opt_parser():
         Option("-H", "--header-fields",
                dest="header_fields", default='all',
                help="Header fields (comma separated) to be printed as well (if present)"),
+
+        Option("--ma", "--data-max-abs-diff",
+               dest="data_max_abs_diff",
+               type=float,
+               default=0.0,
+               help="Maximal absolute difference in data between files to tolerate."),
+
+        Option("--mr", "--data-max-rel-diff",
+               dest="data_max_rel_diff",
+               type=float,
+               default=0.0,
+               help="Maximal relative difference in data between files to tolerate."
+                    " If also --data-max-abs-diff specified, only the data points "
+                    " with absolute difference greater than that value would be "
+                    " considered for relative difference check."),
     ])
 
     return p
@@ -101,8 +116,8 @@ def get_headers_diff(file_headers, names=None):
     return difference
 
 
-def get_data_diff(files):
-    """Get difference between md5 values
+def get_data_md5_diff(files):
+    """Get difference between md5 values of data
 
         Parameters
         ----------
@@ -125,6 +140,65 @@ def get_data_diff(files):
     return md5sums
 
 
+def get_data_diff(files, max_abs=0, max_rel=0):
+    """Get difference between data
+
+    Parameters
+    ----------
+    max_abs: float, optional
+      Maximal absolute difference to tolerate.
+    max_rel: float, optional
+      Maximal relative (`abs(diff)/mean(diff)`) difference to tolerate.
+      If `max_abs` is specified, then those data points with lesser than that
+      absolute difference, are not considered for relative difference testing
+
+    Returns
+    -------
+    TODO
+    """
+    # we are doomed to keep them in RAM now
+    data = [nib.load(f).get_data() for f in files]
+    diffs = OrderedDict()
+    for i, d1 in enumerate(data[:-1]):
+        # populate empty entries for non-compared
+        diffs1 = [None] * (i+1)
+
+        for j, d2 in enumerate(data[i+1:], i + 1):
+            abs_diff = np.abs(d1 - d2)
+            mean_abs = (np.abs(d1) + np.abs(d2)) * 0.5
+            candidates = np.logical_or(mean_abs != 0, abs_diff != 0)
+
+            if max_abs:
+                candidates[abs_diff <= max_abs] = False
+
+            max_abs_diff = np.max(abs_diff)
+            if np.any(candidates):
+                rel_diff = abs_diff[candidates] / mean_abs[candidates]
+                if max_rel:
+                    sub_thr = rel_diff <= max_rel
+                    # Since we operated on sub-selected values already, we need
+                    # to plug them back in
+                    candidates[
+                        tuple((indexes[sub_thr] for indexes in np.where(candidates)))
+                    ] = False
+                max_rel_diff = np.max(rel_diff)
+            else:
+                max_rel_diff = 0
+
+            if np.any(candidates):
+                diff_rec = OrderedDict() # so that abs goes before relative
+                diff_rec['abs'] = max_abs_diff
+                diff_rec['rel'] = max_rel_diff
+                diffs1.append(diff_rec)
+            else:
+                diffs1.append(None)
+
+        if any(diffs1):
+            diffs['DATA(diff %d:)' % (i+1)] = diffs1
+
+    return diffs
+
+
 def display_diff(files, diff):
     """Format header differences into a nice string
 
@@ -145,8 +219,8 @@ def display_diff(files, diff):
     output += "These files are different.\n"
     output += field_width.format('Field')
 
-    for f in files:
-        output += value_width.format(os.path.basename(f))
+    for i, f in enumerate(files, 1):
+        output += "%d:%s" % (i, value_width.format(os.path.basename(f)))
 
     output += "\n"
 
@@ -154,7 +228,12 @@ def display_diff(files, diff):
         output += field_width.format(key)
 
         for item in value:
-            item_str = str(item)
+            if isinstance(item, dict):
+                item_str = ', '.join('%s: %s' % i for i in item.items())
+            elif item is None:
+                item_str = '-'
+            else:
+                item_str = str(item)
             # Value might start/end with some invisible spacing characters so we
             # would "condition" it on both ends a bit
             item_str = re.sub('^[ \t]+', '<', item_str)
@@ -169,8 +248,37 @@ def display_diff(files, diff):
     return output
 
 
+def diff(files, header_fields='all', data_max_abs_diff=None, data_max_rel_diff=None):
+    assert len(files) >= 2, "Please enter at least two files"
+
+    file_headers = [nib.load(f).header for f in files]
+
+    # signals "all fields"
+    if header_fields == 'all':
+        # TODO: header fields might vary across file types, thus prior sensing would be needed
+        header_fields = file_headers[0].keys()
+    else:
+        header_fields = header_fields.split(',')
+
+    diff = get_headers_diff(file_headers, header_fields)
+
+    data_md5_diffs = get_data_md5_diff(files)
+    if data_md5_diffs:
+        # provide details, possibly triggering the ignore of the difference
+        # in data
+        data_diffs = get_data_diff(files,
+                                   max_abs=data_max_abs_diff,
+                                   max_rel=data_max_rel_diff)
+        if data_diffs:
+            diff['DATA(md5)'] = data_md5_diffs
+            diff.update(data_diffs)
+
+    return diff
+
+
 def main(args=None, out=None):
     """Getting the show on the road"""
+
     out = out or sys.stdout
     parser = get_opt_parser()
     (opts, files) = parser.parse_args(args)
@@ -181,27 +289,16 @@ def main(args=None, out=None):
         # suppress nibabel format-compliance warnings
         nib.imageglobals.logger.level = 50
 
-    assert len(files) >= 2, "Please enter at least two files"
-
-    file_headers = [nib.load(f).header for f in files]
-
-    # signals "all fields"
-    if opts.header_fields == 'all':
-        # TODO: header fields might vary across file types, thus prior sensing would be needed
-        header_fields = file_headers[0].keys()
-    else:
-        header_fields = opts.header_fields.split(',')
+    files_diff = diff(
+        files,
+        header_fields=opts.header_fields,
+        data_max_abs_diff=opts.data_max_abs_diff,
+        data_max_rel_diff=opts.data_max_rel_diff
+    )
 
-    diff = get_headers_diff(file_headers, header_fields)
-    data_diff = get_data_diff(files)
-
-    if data_diff:
-        diff['DATA(md5)'] = data_diff
-
-    if diff:
-        out.write(display_diff(files, diff))
+    if files_diff:
+        out.write(display_diff(files, files_diff))
         raise SystemExit(1)
-
     else:
         out.write("These files are identical.\n")
         raise SystemExit(0)
diff --git a/nibabel/cmdline/tests/test_utils.py b/nibabel/cmdline/tests/test_utils.py
index 4aa387b6e5..8ee891ed4c 100644
--- a/nibabel/cmdline/tests/test_utils.py
+++ b/nibabel/cmdline/tests/test_utils.py
@@ -11,7 +11,7 @@
 import nibabel as nib
 import numpy as np
 from nibabel.cmdline.utils import *
-from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_diff
+from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_md5_diff
 from os.path import (join as pjoin)
 from nibabel.testing import data_path
 from collections import OrderedDict
@@ -114,7 +114,7 @@ def test_get_data_diff():
     #  testing for identical files specifically as md5 may vary by computer
     test_names = [pjoin(data_path, f)
                   for f in ('standard.nii.gz', 'standard.nii.gz')]
-    assert_equal(get_data_diff(test_names), [])
+    assert_equal(get_data_md5_diff(test_names), [])
 
 
 def test_main():

From 018eceb71a7708d866af080dfafc1936ae8304ae Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Fri, 14 Sep 2018 16:41:42 -0400
Subject: [PATCH 02/12] ENH: nib-diff Field/File not just Field in the header

---
 nibabel/cmdline/diff.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py
index fe6bc7bce5..b879e327ef 100755
--- a/nibabel/cmdline/diff.py
+++ b/nibabel/cmdline/diff.py
@@ -217,7 +217,7 @@ def display_diff(files, diff):
     value_width = "{:<55}"
 
     output += "These files are different.\n"
-    output += field_width.format('Field')
+    output += field_width.format('Field/File')
 
     for i, f in enumerate(files, 1):
         output += "%d:%s" % (i, value_width.format(os.path.basename(f)))

From 833b4dfccddf8d8ff41f457c7cff92b0b72a5722 Mon Sep 17 00:00:00 2001
From: "Christopher P. Cheng" <forever.21@dartmouth.edu>
Date: Fri, 21 Sep 2018 13:37:57 -0400
Subject: [PATCH 03/12] changed as commented out in the pull request

---
 nibabel/cmdline/diff.py             | 147 +++++++++++++++++++++++-----
 nibabel/cmdline/tests/test_utils.py |   8 +-
 nibabel/tests/test_scripts.py       |   2 +-
 3 files changed, 127 insertions(+), 30 deletions(-)
 mode change 100644 => 100755 nibabel/cmdline/tests/test_utils.py
 mode change 100644 => 100755 nibabel/tests/test_scripts.py

diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py
index 21cd7b40a9..364b3fe0da 100755
--- a/nibabel/cmdline/diff.py
+++ b/nibabel/cmdline/diff.py
@@ -39,6 +39,21 @@ def get_opt_parser():
         Option("-H", "--header-fields",
                dest="header_fields", default='all',
                help="Header fields (comma separated) to be printed as well (if present)"),
+
+        Option("--ma", "--data-max-abs-diff",
+               dest="data_max_abs_diff",
+               type=float,
+               default=0.0,
+               help="Maximal absolute difference in data between files to tolerate."),
+
+        Option("--mr", "--data-max-rel-diff",
+               dest="data_max_rel_diff",
+               type=float,
+               default=0.0,
+               help="Maximal relative difference in data between files to tolerate."
+                    " If also --data-max-abs-diff specified, only the data points "
+                    " with absolute difference greater than that value would be "
+                    " considered for relative difference check."),
     ])
 
     return p
@@ -101,8 +116,8 @@ def get_headers_diff(file_headers, names=None):
     return difference
 
 
-def get_data_diff(files):
-    """Get difference between md5 values
+def get_data_md5_diff(files):
+    """Get difference between md5 values of data
 
         Parameters
         ----------
@@ -125,6 +140,65 @@ def get_data_diff(files):
     return md5sums
 
 
+def get_data_diff(files, max_abs=0, max_rel=0):
+    """Get difference between data
+
+    Parameters
+    ----------
+    max_abs: float, optional
+      Maximal absolute difference to tolerate.
+    max_rel: float, optional
+      Maximal relative (`abs(diff)/mean(diff)`) difference to tolerate.
+      If `max_abs` is specified, then those data points with lesser than that
+      absolute difference, are not considered for relative difference testing
+
+    Returns
+    -------
+    TODO
+    """
+    # we are doomed to keep them in RAM now
+    data = [nib.load(f).get_data() for f in files]
+    diffs = OrderedDict()
+    for i, d1 in enumerate(data[:-1]):
+        # populate empty entries for non-compared
+        diffs1 = [None] * (i + 1)
+
+        for j, d2 in enumerate(data[i + 1:], i + 1):
+            abs_diff = np.abs(d1 - d2)
+            mean_abs = (np.abs(d1) + np.abs(d2)) * 0.5
+            candidates = np.logical_or(mean_abs != 0, abs_diff != 0)
+
+            if max_abs:
+                candidates[abs_diff <= max_abs] = False
+
+            max_abs_diff = np.max(abs_diff)
+            if np.any(candidates):
+                rel_diff = abs_diff[candidates] / mean_abs[candidates]
+                if max_rel:
+                    sub_thr = rel_diff <= max_rel
+                    # Since we operated on sub-selected values already, we need
+                    # to plug them back in
+                    candidates[
+                        tuple((indexes[sub_thr] for indexes in np.where(candidates)))
+                    ] = False
+                max_rel_diff = np.max(rel_diff)
+            else:
+                max_rel_diff = 0
+
+            if np.any(candidates):
+                diff_rec = OrderedDict()  # so that abs goes before relative
+                diff_rec['abs'] = max_abs_diff
+                diff_rec['rel'] = max_rel_diff
+                diffs1.append(diff_rec)
+            else:
+                diffs1.append(None)
+
+        if any(diffs1):
+            diffs['DATA(diff %d:)' % (i + 1)] = diffs1
+
+    return diffs
+
+
 def display_diff(files, diff):
     """Format header differences into a nice string
 
@@ -143,10 +217,10 @@ def display_diff(files, diff):
     value_width = "{:<55}"
 
     output += "These files are different.\n"
-    output += field_width.format('Field')
+    output += field_width.format('Field/File')
 
-    for f in files:
-        output += value_width.format(os.path.basename(f))
+    for i, f in enumerate(files, 1):
+        output += "%d:%s" % (i, value_width.format(os.path.basename(f)))
 
     output += "\n"
 
@@ -154,7 +228,12 @@ def display_diff(files, diff):
         output += field_width.format(key)
 
         for item in value:
-            item_str = str(item)
+            if isinstance(item, dict):
+                item_str = ', '.join('%s: %s' % i for i in item.items())
+            elif item is None:
+                item_str = '-'
+            else:
+                item_str = str(item)
             # Value might start/end with some invisible spacing characters so we
             # would "condition" it on both ends a bit
             item_str = re.sub('^[ \t]+', '<', item_str)
@@ -169,8 +248,37 @@ def display_diff(files, diff):
     return output
 
 
+def diff(files, header_fields='all', data_max_abs_diff=None, data_max_rel_diff=None):
+    assert len(files) >= 2, "Please enter at least two files"
+
+    file_headers = [nib.load(f).header for f in files]
+
+    # signals "all fields"
+    if header_fields == 'all':
+        # TODO: header fields might vary across file types, thus prior sensing would be needed
+        header_fields = file_headers[0].keys()
+    else:
+        header_fields = header_fields.split(',')
+
+    diff = get_headers_diff(file_headers, header_fields)
+
+    data_md5_diffs = get_data_md5_diff(files)
+    if data_md5_diffs:
+        # provide details, possibly triggering the ignore of the difference
+        # in data
+        data_diffs = get_data_diff(files,
+                                   max_abs=data_max_abs_diff,
+                                   max_rel=data_max_rel_diff)
+        if data_diffs:
+            diff['DATA(md5)'] = data_md5_diffs
+            diff.update(data_diffs)
+
+    return diff
+
+
 def main(args=None, out=None):
     """Getting the show on the road"""
+
     out = out or sys.stdout
     parser = get_opt_parser()
     (opts, files) = parser.parse_args(args)
@@ -181,27 +289,16 @@ def main(args=None, out=None):
         # suppress nibabel format-compliance warnings
         nib.imageglobals.logger.level = 50
 
-    assert len(files) >= 2, "Please enter at least two files"
-
-    file_headers = [nib.load(f).header for f in files]
-
-    # signals "all fields"
-    if opts.header_fields == 'all':
-        # TODO: header fields might vary across file types, thus prior sensing would be needed
-        header_fields = file_headers[0].keys()
-    else:
-        header_fields = opts.header_fields.split(',')
+    files_diff = diff(
+        files,
+        header_fields=opts.header_fields,
+        data_max_abs_diff=opts.data_max_abs_diff,
+        data_max_rel_diff=opts.data_max_rel_diff
+    )
 
-    diff = get_headers_diff(file_headers, header_fields)
-    data_diff = get_data_diff(files)
-
-    if data_diff:
-        diff['DATA(md5)'] = data_diff
-
-    if diff:
-        out.write(display_diff(files, diff))
+    if files_diff:
+        out.write(display_diff(files, files_diff))
         raise SystemExit(1)
-
     else:
         out.write("These files are identical.\n")
         raise SystemExit(0)
diff --git a/nibabel/cmdline/tests/test_utils.py b/nibabel/cmdline/tests/test_utils.py
old mode 100644
new mode 100755
index 4aa387b6e5..45c3d32cc6
--- a/nibabel/cmdline/tests/test_utils.py
+++ b/nibabel/cmdline/tests/test_utils.py
@@ -11,7 +11,7 @@
 import nibabel as nib
 import numpy as np
 from nibabel.cmdline.utils import *
-from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_diff
+from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_md5_diff
 from os.path import (join as pjoin)
 from nibabel.testing import data_path
 from collections import OrderedDict
@@ -96,8 +96,8 @@ def test_display_diff():
         ("bitpix", [np.array(8).astype(dtype="uint8"), np.array(16).astype(dtype="uint8")])
     ])
 
-    expected_output = "These files are different.\n" + "Field          hellokitty.nii.gz" \
-                                                       "                                      " \
+    expected_output = "These files are different.\n" + "Field/File          hellokitty.nii.gz" \
+                                                       "                                 " \
                                                        "privettovarish.nii.gz                                  \n" \
                                                        "datatype       " \
                                                        "2                                                      " \
@@ -114,7 +114,7 @@ def test_get_data_diff():
     #  testing for identical files specifically as md5 may vary by computer
     test_names = [pjoin(data_path, f)
                   for f in ('standard.nii.gz', 'standard.nii.gz')]
-    assert_equal(get_data_diff(test_names), [])
+    assert_equal(get_data_md5_diff(test_names), [])
 
 
 def test_main():
diff --git a/nibabel/tests/test_scripts.py b/nibabel/tests/test_scripts.py
old mode 100644
new mode 100755
index 0aa404a939..6aba6c4038
--- a/nibabel/tests/test_scripts.py
+++ b/nibabel/tests/test_scripts.py
@@ -72,7 +72,7 @@ def check_nib_diff_examples():
     fnames = [pjoin(DATA_PATH, f)
                for f in ('standard.nii.gz', 'example4d.nii.gz')]
     code, stdout, stderr = run_command(['nib-diff'] + fnames, check_code=False)
-    checked_fields = ["Field", "regular", "dim_info", "dim", "datatype", "bitpix", "pixdim", "slice_end",
+    checked_fields = ["Field/File", "regular", "dim_info", "dim", "datatype", "bitpix", "pixdim", "slice_end",
                       "xyzt_units", "cal_max", "descrip", "qform_code", "sform_code", "quatern_b",
                       "quatern_c", "quatern_d", "qoffset_x", "qoffset_y", "qoffset_z", "srow_x",
                       "srow_y", "srow_z", "DATA(md5)"]

From 1e33ea7db4ff54f06b38bfcfbf6d988f82604bc6 Mon Sep 17 00:00:00 2001
From: "Christopher P. Cheng" <forever.21@dartmouth.edu>
Date: Tue, 25 Sep 2018 11:01:28 -0400
Subject: [PATCH 04/12] RF: anticipated files of different shapes, fixed table
 display, corrected tests

---
 nibabel/cmdline/diff.py             | 64 +++++++++++++++--------------
 nibabel/cmdline/tests/test_utils.py |  6 +--
 nibabel/tests/test_scripts.py       |  5 ++-
 3 files changed, 41 insertions(+), 34 deletions(-)

diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py
index 171b5ae8c3..edfbb0c5d0 100755
--- a/nibabel/cmdline/diff.py
+++ b/nibabel/cmdline/diff.py
@@ -165,42 +165,45 @@ def get_data_diff(files, max_abs=0, max_rel=0):
 
         for j, d2 in enumerate(data[i + 1:], i + 1):
 
-            abs_diff = np.abs(d1 - d2)
-            mean_abs = (np.abs(d1) + np.abs(d2)) * 0.5
-            candidates = np.logical_or(mean_abs != 0, abs_diff != 0)
-
-            if max_abs:
-                candidates[abs_diff <= max_abs] = False
-
-            max_abs_diff = np.max(abs_diff)
-            if np.any(candidates):
-                rel_diff = abs_diff[candidates] / mean_abs[candidates]
-                if max_rel:
-                    sub_thr = rel_diff <= max_rel
-                    # Since we operated on sub-selected values already, we need
-                    # to plug them back in
-                    candidates[
-                        tuple((indexes[sub_thr] for indexes in np.where(candidates)))
-                    ] = False
-                max_rel_diff = np.max(rel_diff)
-            else:
-                max_rel_diff = 0
-
-            if np.any(candidates):
+            if d1.shape == d2.shape:
+                abs_diff = np.abs(d1 - d2)
+                mean_abs = (np.abs(d1) + np.abs(d2)) * 0.5
+                candidates = np.logical_or(mean_abs != 0, abs_diff != 0)
+
+                if max_abs:
+                    candidates[abs_diff <= max_abs] = False
+
+                max_abs_diff = np.max(abs_diff)
+                if np.any(candidates):
+                    rel_diff = abs_diff[candidates] / mean_abs[candidates]
+                    if max_rel:
+                        sub_thr = rel_diff <= max_rel
+                        # Since we operated on sub-selected values already, we need
+                        # to plug them back in
+                        candidates[
+                            tuple((indexes[sub_thr] for indexes in np.where(candidates)))
+                        ] = False
+                    max_rel_diff = np.max(rel_diff)
+                else:
+                    max_rel_diff = 0
+
+                if np.any(candidates):
+
+                    diff_rec = OrderedDict()  # so that abs goes before relative
+
+                    diff_rec['abs'] = max_abs_diff
+                    diff_rec['rel'] = max_rel_diff
+                    diffs1.append(diff_rec)
+                else:
+                    diffs1.append(None)
 
-                diff_rec = OrderedDict()  # so that abs goes before relative
-
-                diff_rec['abs'] = max_abs_diff
-                diff_rec['rel'] = max_rel_diff
-                diffs1.append(diff_rec)
             else:
-                diffs1.append(None)
+                diffs1.append({'CMP': "incompat"})
 
         if any(diffs1):
 
             diffs['DATA(diff %d:)' % (i + 1)] = diffs1
 
-
     return diffs
 
 
@@ -219,13 +222,14 @@ def display_diff(files, diff):
     """
     output = ""
     field_width = "{:<15}"
+    filename_width = "{:<53}"
     value_width = "{:<55}"
 
     output += "These files are different.\n"
     output += field_width.format('Field/File')
 
     for i, f in enumerate(files, 1):
-        output += "%d:%s" % (i, value_width.format(os.path.basename(f)))
+        output += "%d:%s" % (i, filename_width.format(os.path.basename(f)))
 
     output += "\n"
 
diff --git a/nibabel/cmdline/tests/test_utils.py b/nibabel/cmdline/tests/test_utils.py
index 45c3d32cc6..6d4fec2344 100755
--- a/nibabel/cmdline/tests/test_utils.py
+++ b/nibabel/cmdline/tests/test_utils.py
@@ -96,9 +96,9 @@ def test_display_diff():
         ("bitpix", [np.array(8).astype(dtype="uint8"), np.array(16).astype(dtype="uint8")])
     ])
 
-    expected_output = "These files are different.\n" + "Field/File          hellokitty.nii.gz" \
-                                                       "                                 " \
-                                                       "privettovarish.nii.gz                                  \n" \
+    expected_output = "These files are different.\n" + "Field/File     1:hellokitty.nii.gz" \
+                                                       "                                    " \
+                                                       "2:privettovarish.nii.gz                                \n" \
                                                        "datatype       " \
                                                        "2                                                      " \
                                                        "4                                                      \n" \
diff --git a/nibabel/tests/test_scripts.py b/nibabel/tests/test_scripts.py
index 6aba6c4038..537c1df992 100755
--- a/nibabel/tests/test_scripts.py
+++ b/nibabel/tests/test_scripts.py
@@ -75,8 +75,11 @@ def check_nib_diff_examples():
     checked_fields = ["Field/File", "regular", "dim_info", "dim", "datatype", "bitpix", "pixdim", "slice_end",
                       "xyzt_units", "cal_max", "descrip", "qform_code", "sform_code", "quatern_b",
                       "quatern_c", "quatern_d", "qoffset_x", "qoffset_y", "qoffset_z", "srow_x",
-                      "srow_y", "srow_z", "DATA(md5)"]
+                      "srow_y", "srow_z", "DATA(md5)", "DATA(diff 1:)"]
     for item in checked_fields:
+        if item not in stdout:
+            print(item)
+            print(stdout)
         assert_true(item in stdout)
 
     fnames2 = [pjoin(DATA_PATH, f)

From 76ca32f9ecb9110c67efd425c33c3ed7232e257e Mon Sep 17 00:00:00 2001
From: "Christopher P. Cheng" <forever.21@dartmouth.edu>
Date: Thu, 27 Sep 2018 12:14:41 -0400
Subject: [PATCH 05/12] elaborated docstring, modified get_data_diff to allow
 direct array input, added tests for coverage

---
 nibabel/cmdline/diff.py             |  4 +++-
 nibabel/cmdline/tests/test_utils.py | 32 ++++++++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py
index edfbb0c5d0..caa8e385fd 100755
--- a/nibabel/cmdline/diff.py
+++ b/nibabel/cmdline/diff.py
@@ -145,6 +145,8 @@ def get_data_diff(files, max_abs=0, max_rel=0):
 
     Parameters
     ----------
+    files: list of (str or ndarray)
+      If list of strings is provided -- they must be existing file names
     max_abs: float, optional
       Maximal absolute difference to tolerate.
     max_rel: float, optional
@@ -157,7 +159,7 @@ def get_data_diff(files, max_abs=0, max_rel=0):
     TODO
     """
     # we are doomed to keep them in RAM now
-    data = [nib.load(f).get_data() for f in files]
+    data = [f if isinstance(f, np.ndarray) else nib.load(f).get_data() for f in files]
     diffs = OrderedDict()
     for i, d1 in enumerate(data[:-1]):
         # populate empty entries for non-compared
diff --git a/nibabel/cmdline/tests/test_utils.py b/nibabel/cmdline/tests/test_utils.py
index 6d4fec2344..50d72a5ade 100755
--- a/nibabel/cmdline/tests/test_utils.py
+++ b/nibabel/cmdline/tests/test_utils.py
@@ -11,7 +11,7 @@
 import nibabel as nib
 import numpy as np
 from nibabel.cmdline.utils import *
-from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_md5_diff
+from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_md5_diff, get_data_diff
 from os.path import (join as pjoin)
 from nibabel.testing import data_path
 from collections import OrderedDict
@@ -116,6 +116,36 @@ def test_get_data_diff():
                   for f in ('standard.nii.gz', 'standard.nii.gz')]
     assert_equal(get_data_md5_diff(test_names), [])
 
+    #  testing the maximum relative and absolute differences' different use cases
+    test_array = np.arange(16).reshape(4, 4)
+    test_array_2 = np.arange(1, 17).reshape(4, 4)
+    test_array_3 = np.arange(2, 18).reshape(4, 4)
+    test_array_4 = np.arange(100).reshape(10, 10)
+    test_array_5 = np.arange(64).reshape(8, 8)
+
+    # same shape, 2 files
+    assert_equal(get_data_diff([test_array, test_array_2]),
+                 OrderedDict([('DATA(diff 1:)', [None, OrderedDict([('abs', 1), ('rel', 2.0)])])]))
+
+    # same shape, 3 files
+    assert_equal(get_data_diff([test_array, test_array_2, test_array_3]),
+                 OrderedDict([('DATA(diff 1:)', [None, OrderedDict([('abs', 1), ('rel', 2.0)]),
+                                                 OrderedDict([('abs', 2), ('rel', 2.0)])]),
+                              ('DATA(diff 2:)', [None, None,
+                                                 OrderedDict([('abs', 1), ('rel', 0.66666666666666663)])])]))
+
+    # same shape, 2 files, modified maximum abs/rel
+    assert_equal(get_data_diff([test_array, test_array_2], max_abs=2, max_rel=2), OrderedDict())
+
+    # different shape, 2 files
+    assert_equal(get_data_diff([test_array_2, test_array_4]),
+                 OrderedDict([('DATA(diff 1:)', [None, {'CMP': 'incompat'}])]))
+
+    # different shape, 3 files
+    assert_equal(get_data_diff([test_array_4, test_array_5, test_array_2]),
+                 OrderedDict([('DATA(diff 1:)', [None, {'CMP': 'incompat'}, {'CMP': 'incompat'}]),
+                              ('DATA(diff 2:)', [None, None, {'CMP': 'incompat'}])]))
+
 
 def test_main():
     test_names = [pjoin(data_path, f)

From 0aa63705819a6af1a843bf57ffc00e906e0260fd Mon Sep 17 00:00:00 2001
From: "Christopher P. Cheng" <forever.21@dartmouth.edu>
Date: Fri, 28 Sep 2018 12:59:21 -0400
Subject: [PATCH 06/12] added to diff documentation, undid executable change,
 took out debugging script in test_scripts

---
 nibabel/cmdline/diff.py             | 3 ++-
 nibabel/cmdline/tests/test_utils.py | 0
 nibabel/tests/test_scripts.py       | 3 ---
 3 files changed, 2 insertions(+), 4 deletions(-)
 mode change 100755 => 100644 nibabel/cmdline/tests/test_utils.py

diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py
index caa8e385fd..74c100ce48 100755
--- a/nibabel/cmdline/diff.py
+++ b/nibabel/cmdline/diff.py
@@ -156,7 +156,8 @@ def get_data_diff(files, max_abs=0, max_rel=0):
 
     Returns
     -------
-    TODO
+    OrderedDict
+        str: absolute and relative differences of each file, given as float
     """
     # we are doomed to keep them in RAM now
     data = [f if isinstance(f, np.ndarray) else nib.load(f).get_data() for f in files]
diff --git a/nibabel/cmdline/tests/test_utils.py b/nibabel/cmdline/tests/test_utils.py
old mode 100755
new mode 100644
diff --git a/nibabel/tests/test_scripts.py b/nibabel/tests/test_scripts.py
index 537c1df992..a734fbeeda 100755
--- a/nibabel/tests/test_scripts.py
+++ b/nibabel/tests/test_scripts.py
@@ -77,9 +77,6 @@ def check_nib_diff_examples():
                       "quatern_c", "quatern_d", "qoffset_x", "qoffset_y", "qoffset_z", "srow_x",
                       "srow_y", "srow_z", "DATA(md5)", "DATA(diff 1:)"]
     for item in checked_fields:
-        if item not in stdout:
-            print(item)
-            print(stdout)
         assert_true(item in stdout)
 
     fnames2 = [pjoin(DATA_PATH, f)

From d057249fbf818c3a742240c1f977458c40c789f1 Mon Sep 17 00:00:00 2001
From: "Christopher P. Cheng" <forever.21@dartmouth.edu>
Date: Fri, 28 Sep 2018 13:04:29 -0400
Subject: [PATCH 07/12] undid permission snafu on test_scripts

---
 nibabel/tests/test_scripts.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100755 => 100644 nibabel/tests/test_scripts.py

diff --git a/nibabel/tests/test_scripts.py b/nibabel/tests/test_scripts.py
old mode 100755
new mode 100644

From 76ee358c2d67ad4333eb0fead63dbad654efa48b Mon Sep 17 00:00:00 2001
From: "Christopher P. Cheng" <forever.21@dartmouth.edu>
Date: Tue, 2 Oct 2018 10:49:06 -0400
Subject: [PATCH 08/12] docstring and function name clarification, change
 get_data to get_fdata()

---
 nibabel/cmdline/diff.py             | 18 +++++++++++-------
 nibabel/cmdline/tests/test_utils.py |  4 ++--
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py
index 74c100ce48..13b0781b03 100755
--- a/nibabel/cmdline/diff.py
+++ b/nibabel/cmdline/diff.py
@@ -51,7 +51,7 @@ def get_opt_parser():
                type=float,
                default=0.0,
                help="Maximal relative difference in data between files to tolerate."
-                    " If also --data-max-abs-diff specified, only the data points "
+                    " If --data-max-abs-diff is also specified, only the data points "
                     " with absolute difference greater than that value would be "
                     " considered for relative difference check."),
     ])
@@ -116,7 +116,7 @@ def get_headers_diff(file_headers, names=None):
     return difference
 
 
-def get_data_md5_diff(files):
+def get_data_hash_diff(files):
     """Get difference between md5 values of data
 
         Parameters
@@ -130,7 +130,7 @@ def get_data_md5_diff(files):
         """
 
     md5sums = [
-        hashlib.md5(np.ascontiguousarray(nib.load(f).get_data(), dtype=np.float32)).hexdigest()
+        hashlib.md5(np.ascontiguousarray(nib.load(f).get_fdata())).hexdigest()
         for f in files
     ]
 
@@ -156,11 +156,15 @@ def get_data_diff(files, max_abs=0, max_rel=0):
 
     Returns
     -------
-    OrderedDict
-        str: absolute and relative differences of each file, given as float
+    diffs: OrderedDict
+        An ordered dict with a record per each file which has differences with other files subsequent detected.
+        Each record is a list of difference records, one per each file pair. Each difference record is an Ordered
+        Dict with possible keys 'abs' or 'rel' showing maximal absolute or relative differences in the file 
+        or record ('CMP': 'incompat') if file shapes are incompatible.
     """
+    
     # we are doomed to keep them in RAM now
-    data = [f if isinstance(f, np.ndarray) else nib.load(f).get_data() for f in files]
+    data = [f if isinstance(f, np.ndarray) else nib.load(f).get_fdata() for f in files]
     diffs = OrderedDict()
     for i, d1 in enumerate(data[:-1]):
         # populate empty entries for non-compared
@@ -274,7 +278,7 @@ def diff(files, header_fields='all', data_max_abs_diff=None, data_max_rel_diff=N
 
     diff = get_headers_diff(file_headers, header_fields)
 
-    data_md5_diffs = get_data_md5_diff(files)
+    data_md5_diffs = get_data_hash_diff(files)
     if data_md5_diffs:
         # provide details, possibly triggering the ignore of the difference
         # in data
diff --git a/nibabel/cmdline/tests/test_utils.py b/nibabel/cmdline/tests/test_utils.py
index 50d72a5ade..12f19a1003 100644
--- a/nibabel/cmdline/tests/test_utils.py
+++ b/nibabel/cmdline/tests/test_utils.py
@@ -11,7 +11,7 @@
 import nibabel as nib
 import numpy as np
 from nibabel.cmdline.utils import *
-from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_md5_diff, get_data_diff
+from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_hash_diff, get_data_diff
 from os.path import (join as pjoin)
 from nibabel.testing import data_path
 from collections import OrderedDict
@@ -114,7 +114,7 @@ def test_get_data_diff():
     #  testing for identical files specifically as md5 may vary by computer
     test_names = [pjoin(data_path, f)
                   for f in ('standard.nii.gz', 'standard.nii.gz')]
-    assert_equal(get_data_md5_diff(test_names), [])
+    assert_equal(get_data_hash_diff(test_names), [])
 
     #  testing the maximum relative and absolute differences' different use cases
     test_array = np.arange(16).reshape(4, 4)

From 034c2768dd9fd8628f2236c739eeda50e125f69b Mon Sep 17 00:00:00 2001
From: "Christopher P. Cheng" <forever.21@dartmouth.edu>
Date: Tue, 2 Oct 2018 11:23:39 -0400
Subject: [PATCH 09/12] corrected styles per Travis, limited fdata to float32

---
 nibabel/cmdline/diff.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py
index 13b0781b03..fe107a7da5 100755
--- a/nibabel/cmdline/diff.py
+++ b/nibabel/cmdline/diff.py
@@ -130,7 +130,7 @@ def get_data_hash_diff(files):
         """
 
     md5sums = [
-        hashlib.md5(np.ascontiguousarray(nib.load(f).get_fdata())).hexdigest()
+        hashlib.md5(np.ascontiguousarray(nib.load(f).get_fdata(dtype=np.float32))).hexdigest()
         for f in files
     ]
 
@@ -157,14 +157,17 @@ def get_data_diff(files, max_abs=0, max_rel=0):
     Returns
     -------
     diffs: OrderedDict
-        An ordered dict with a record per each file which has differences with other files subsequent detected.
-        Each record is a list of difference records, one per each file pair. Each difference record is an Ordered
-        Dict with possible keys 'abs' or 'rel' showing maximal absolute or relative differences in the file 
-        or record ('CMP': 'incompat') if file shapes are incompatible.
+        An ordered dict with a record per each file which has differences
+        with other files subsequent detected. Each record is a list of
+        difference records, one per each file pair.
+        Each difference record is an Ordered Dict with possible keys
+        'abs' or 'rel' showing maximal absolute or relative differences
+        in the file or the record ('CMP': 'incompat') if file shapes
+        are incompatible.
     """
-    
+
     # we are doomed to keep them in RAM now
-    data = [f if isinstance(f, np.ndarray) else nib.load(f).get_fdata() for f in files]
+    data = [f if isinstance(f, np.ndarray) else nib.load(f).get_fdata(dtype=np.float32) for f in files]
     diffs = OrderedDict()
     for i, d1 in enumerate(data[:-1]):
         # populate empty entries for non-compared

From 19fcdd5c6337197c81acbdd71416df3eedcfe67e Mon Sep 17 00:00:00 2001
From: Chris Markiewicz <effigies@gmail.com>
Date: Wed, 3 Oct 2018 09:30:45 -0400
Subject: [PATCH 10/12] STY: Break overly-long line

---
 nibabel/cmdline/diff.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py
index fe107a7da5..1d0afb1ece 100755
--- a/nibabel/cmdline/diff.py
+++ b/nibabel/cmdline/diff.py
@@ -167,7 +167,8 @@ def get_data_diff(files, max_abs=0, max_rel=0):
     """
 
     # we are doomed to keep them in RAM now
-    data = [f if isinstance(f, np.ndarray) else nib.load(f).get_fdata(dtype=np.float32) for f in files]
+    data = [f if isinstance(f, np.ndarray) else nib.load(f).get_fdata(dtype=np.float32)
+            for f in files]
     diffs = OrderedDict()
     for i, d1 in enumerate(data[:-1]):
         # populate empty entries for non-compared

From 93c7bb63d428fcf50578d22e9eac521e75178148 Mon Sep 17 00:00:00 2001
From: "Christopher P. Cheng" <forever.21@dartmouth.edu>
Date: Wed, 3 Oct 2018 13:03:18 -0400
Subject: [PATCH 11/12] prepared for future PR to allow modification of dtype
 used in diff comparison

---
 nibabel/cmdline/diff.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/nibabel/cmdline/diff.py b/nibabel/cmdline/diff.py
index fe107a7da5..fa8d1adb60 100755
--- a/nibabel/cmdline/diff.py
+++ b/nibabel/cmdline/diff.py
@@ -130,7 +130,7 @@ def get_data_hash_diff(files):
         """
 
     md5sums = [
-        hashlib.md5(np.ascontiguousarray(nib.load(f).get_fdata(dtype=np.float32))).hexdigest()
+        hashlib.md5(np.ascontiguousarray(nib.load(f).get_fdata())).hexdigest()
         for f in files
     ]
 
@@ -167,7 +167,8 @@ def get_data_diff(files, max_abs=0, max_rel=0):
     """
 
     # we are doomed to keep them in RAM now
-    data = [f if isinstance(f, np.ndarray) else nib.load(f).get_fdata(dtype=np.float32) for f in files]
+    data = [f if isinstance(f, np.ndarray) else nib.load(f).get_fdata()
+            for f in files]
     diffs = OrderedDict()
     for i, d1 in enumerate(data[:-1]):
         # populate empty entries for non-compared

From 716b1c6d7b5312aae699a7431d4aa2b78c7a62b0 Mon Sep 17 00:00:00 2001
From: "Christopher J. Markiewicz" <markiewicz@stanford.edu>
Date: Wed, 3 Oct 2018 13:18:12 -0400
Subject: [PATCH 12/12] CI: Update pip/setuptools in AppVeyor

---
 appveyor.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/appveyor.yml b/appveyor.yml
index 3bb9c3d074..e1ecfad43e 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -19,6 +19,9 @@ install:
   # the parent CMD process).
   - SET PATH=%PYTHON%;%PYTHON%\Scripts;%PATH%
 
+  # Update install environment
+  - pip install --upgrade pip setuptools
+
   # Install the dependencies of the project.
   - pip install numpy scipy matplotlib nose h5py mock hypothesis pydicom
   - pip install .