-
Notifications
You must be signed in to change notification settings - Fork 264
NF nib-diff #617
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
NF nib-diff #617
Changes from 67 commits
328d3bb
d293a20
5491af4
949762c
93b5e09
22804f1
f81a78b
fe9c052
5eb4477
e2defb0
5e3a767
7febf65
23a43ba
fae491d
3e87d81
a3b35d9
1491c61
397bc03
f192f65
92553a2
7a70d56
f5e930d
df82a51
6d706f5
774ce3b
911d781
0458694
fed70e9
0b59dfb
2920abf
1e57409
fd6c474
497ad2a
df0aa79
c23143c
db16d85
feca439
acf667b
bb3fbf0
8a92010
a9a572a
3290a66
92e4ed0
06e8dd7
8fd6995
df8bc04
45d3fbf
1cbf5b3
45bdf64
c600746
e26adb5
3802919
0ce86df
72bc800
0cf2a8c
5db2654
50a480e
9e155df
3c0c90c
f8c32b8
51733b0
41caade
f1cee5f
676ac70
f476c48
10c2c42
7989563
ae74339
82b1457
45d0edf
c1f553f
2f89242
6613522
a311d7b
2cd69b5
414da00
59006b0
672661e
baf6cdc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#!python | ||
# emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- | ||
# vi: set ft=python sts=4 ts=4 sw=4 et: | ||
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## | ||
# | ||
# See COPYING file distributed along with the NiBabel package for the | ||
# copyright and license terms. | ||
# | ||
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## | ||
""" | ||
Quick diff summary for a set of neuroimaging files | ||
""" | ||
|
||
from nibabel.cmdline.diff import main | ||
|
||
if __name__ == '__main__': | ||
main() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,3 +2,4 @@ | |
-r requirements.txt | ||
nose | ||
mock | ||
hypothesis |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,298 @@ | ||
#!python | ||
# emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- | ||
# vi: set ft=python sts=4 ts=4 sw=4 et: | ||
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## | ||
# | ||
# See COPYING file distributed along with the NiBabel package for the | ||
# copyright and license terms. | ||
# | ||
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## | ||
""" | ||
Quick summary of the differences among a set of neuroimaging files | ||
""" | ||
from __future__ import division, print_function, absolute_import | ||
|
||
import re | ||
import sys | ||
from collections import OrderedDict | ||
from optparse import OptionParser, Option | ||
|
||
import numpy as np | ||
|
||
import nibabel as nib | ||
import nibabel.cmdline.utils | ||
import hashlib | ||
|
||
|
||
def get_opt_parser(): | ||
# use module docstring for help output | ||
p = OptionParser( | ||
usage="%s [OPTIONS] [FILE ...]\n\n" % sys.argv[0] + __doc__, | ||
version="%prog " + nib.__version__) | ||
|
||
p.add_options([ | ||
Option("-v", "--verbose", action="count", | ||
dest="verbose", default=0, | ||
help="Make more noise. Could be specified multiple times"), | ||
|
||
Option("-H", "--header-fields", | ||
dest="header_fields", default='all', | ||
help="Header fields (comma separated) to be printed as well (if present)"), | ||
]) | ||
|
||
return p | ||
|
||
|
||
def diff_values(first_item, second_item): | ||
"""Generically compares two values, returns true if different""" | ||
if np.any(first_item != second_item): # comparing items that are instances of class np.ndarray | ||
return True | ||
|
||
elif type(first_item) != type(second_item): # comparing items that differ in data type | ||
return True | ||
|
||
else: # all other use cases | ||
return first_item != second_item | ||
|
||
|
||
def diff_headers(files, fields): | ||
"""Iterates over all header fields of all files to find those that differ | ||
|
||
Parameters | ||
---------- | ||
files: a given list of files to be compared | ||
fields: the fields to be compared | ||
|
||
Returns | ||
------- | ||
list | ||
header fields whose values differ across files | ||
""" | ||
|
||
headers = [] | ||
|
||
for f in range(len(files)): # for each file | ||
for h in fields: # for each header | ||
|
||
# each maneuver is encased in a try block after exceptions have previously occurred | ||
# get the particular header field within the particular file | ||
|
||
try: | ||
field = files[f][h] | ||
|
||
except ValueError: | ||
continue | ||
|
||
# filter numpy arrays with a NaN value | ||
try: | ||
if np.all(np.isnan(field)): | ||
continue | ||
|
||
except TypeError: | ||
pass | ||
|
||
# compare current file with other files | ||
for i in files[f + 1:]: | ||
other_field = i[h] | ||
|
||
# sometimes field.item doesn't work | ||
try: | ||
# converting bytes to be compared as strings | ||
if isinstance(field.item(0), bytes): | ||
field = field.item(0).decode("utf-8") | ||
|
||
# converting np.ndarray to lists to remove ambiguity | ||
if isinstance(field, np.ndarray): | ||
field = field.tolist() | ||
|
||
if isinstance(other_field.item(0), bytes): | ||
other_field = other_field.item(0).decode("utf-8") | ||
if isinstance(other_field, np.ndarray): | ||
other_field = other_field.tolist() | ||
|
||
except AttributeError: | ||
continue | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why continue? if one misses smth when others don't -- they differ |
||
|
||
# if the header values of the two files are different, append | ||
if diff_values(field, other_field): | ||
headers.append(h) | ||
|
||
if headers: # return a list of headers for the files whose values differ | ||
return headers | ||
|
||
|
||
def diff_header_fields(header_field, files): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. since it is just a single Also you also have adjust also docstring to correspond. e.g.
In PyCharm you could easily rename functions using Refactor -> Rename function. But then I also spotted
Why not to make |
||
"""Iterates over a single header field of multiple files | ||
|
||
Parameters | ||
---------- | ||
header_field: a given header field | ||
files: the files to be compared | ||
|
||
Returns | ||
------- | ||
list | ||
str for each value corresponding to each file's given header field | ||
""" | ||
|
||
keyed_inputs = [] | ||
|
||
for i in files: | ||
|
||
# each maneuver is encased in a try block after exceptions have previously occurred | ||
# get the particular header field within the particular file | ||
|
||
try: | ||
field_value = i[header_field] | ||
except ValueError: | ||
continue | ||
|
||
# compare different data types, return all values as soon as diff is found | ||
for x in files[1:]: | ||
try: | ||
data_diff = diff_values(str(x[header_field].dtype), str(field_value.dtype)) | ||
|
||
if data_diff: | ||
break | ||
except ValueError: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have no idea ValueError could/should happen here (no comment etc) but I do not think it should be considered as "there is no difference" |
||
continue | ||
|
||
# string formatting of responses | ||
try: | ||
|
||
# if differences are found among data types | ||
if data_diff: | ||
# accounting for how to arrange arrays | ||
if field_value.ndim < 1: | ||
keyed_inputs.append("{}@{}".format(field_value, field_value.dtype)) | ||
elif field_value.ndim == 1: | ||
keyed_inputs.append("{}@{}".format(list(field_value), field_value.dtype)) | ||
|
||
# if no differences are found among data types | ||
else: | ||
if field_value.ndim < 1: | ||
keyed_inputs.append(field_value) | ||
elif field_value.ndim == 1: | ||
keyed_inputs.append(list(field_value)) | ||
|
||
except UnboundLocalError: | ||
continue | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this doesn't look "kosher" -- try to make code explicit to not have some undefined local variables used |
||
|
||
for i in range(len(keyed_inputs)): | ||
keyed_inputs[i] = str(keyed_inputs[i]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here and in general in Python try to avoid explicit indexing. Eg. here it is just a list comprehension (or even a keyed_inputs = [str(x) for x in keyed_inputs] or # list() because map in PY3 is a generator
keyed_inputs = list(map(str, keyed_inputs))) |
||
|
||
return keyed_inputs | ||
|
||
|
||
def get_headers_diff(file_headers, headers): | ||
"""Get difference between headers | ||
|
||
Parameters | ||
---------- | ||
file_headers: list of actual headers from files | ||
headers: list of header fields that differ | ||
|
||
Returns | ||
------- | ||
dict | ||
str: list for each header field which differs, return list of | ||
values per each file | ||
""" | ||
output = OrderedDict() | ||
|
||
# if there are headers that differ | ||
if headers: | ||
|
||
# for each header | ||
for header in headers: | ||
|
||
# find the values corresponding to the files that differ | ||
val = diff_header_fields(header, file_headers) | ||
|
||
# store these values in a dictionary | ||
if val: | ||
output[header] = val | ||
|
||
return output | ||
|
||
|
||
def get_data_md5sums(files): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know that you would hate continued review @chrispycheng but, by seeing the function name I got confused why below it returns an empty list if there is only one unique value. So, please
|
||
|
||
md5sums = [ | ||
hashlib.md5(np.ascontiguousarray(nib.load(f).get_data(), dtype=np.float32)).hexdigest() | ||
for f in files | ||
] | ||
|
||
if len(set(md5sums)) == 1: | ||
return [] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that might be one contributor to your .000?% coverage miss (do you have codecov extension to the browser installed to see what lines aren't covered?). Apparently there is no test which verifies that you do get empty list in output whenever two files have the same data? you could make a dedicated test for this function and feed it |
||
|
||
return md5sums | ||
|
||
|
||
def main(): | ||
"""Getting the show on the road""" | ||
|
||
parser = get_opt_parser() | ||
(opts, files) = parser.parse_args() | ||
|
||
nibabel.cmdline.utils.verbose_level = opts.verbose | ||
|
||
assert len(files) >= 2, "Please enter at least two files" | ||
|
||
if nibabel.cmdline.utils.verbose_level < 3: | ||
# suppress nibabel format-compliance warnings | ||
nib.imageglobals.logger.level = 50 | ||
|
||
file_headers = [nib.load(f).header for f in files] | ||
|
||
if opts.header_fields: # will almost always have a header field | ||
# signals "all fields" | ||
if opts.header_fields == 'all': | ||
# TODO: header fields might vary across file types, thus prior sensing would be needed | ||
header_fields = file_headers[0].keys() | ||
else: | ||
header_fields = opts.header_fields.split(',') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. apparently has no test case to test this! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How do you test an intermediary if/else statement within a function? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. invoke the command/function with options where you specify your list of fields to be used for comparison There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So that would be the command parser itself? I don't think any other NiBabel function has a test for that, maybe that could be a separate PR? |
||
headers = diff_headers(file_headers, header_fields) | ||
diff = get_headers_diff(file_headers, headers) | ||
data_diff = get_data_md5sums(files) | ||
|
||
if data_diff: | ||
diff['DATA(md5)'] = data_diff | ||
|
||
if diff: | ||
print("These files are different.") | ||
print("{:<11}".format('Field'), end="") | ||
|
||
for f in files: | ||
output = "" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is |
||
i = 0 | ||
while i < len(f): | ||
if f[i] == "/" or f[i] == "\\": | ||
output = "" | ||
else: | ||
output += f[i] | ||
i += 1 | ||
|
||
print("{:<45}".format(output), end="") | ||
|
||
print() | ||
|
||
for key, value in diff.items(): | ||
print("{:<11}".format(key), end="") | ||
|
||
for item in value: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another idea that might be interesting to consider is whether layout differences vertically, which can scale better with number of images. Although horizontal layout looks nice for 2/3 images, which can be the default, but for a 10 images, it might be easier print each differing field into a separate block of values from all images. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indeed, but most frequent case is actually two files and many fields differing |
||
item_str = str(item) | ||
# Value might start/end with some invisible spacing characters so we | ||
# would "condition" it on both ends a bit | ||
item_str = re.sub('^[ \t]+', '<', item_str) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how is this different from using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Place those indicators only if anything was replaced at any end |
||
item_str = re.sub('[ \t]+$', '>', item_str) | ||
# and also replace some other invisible symbols with a question | ||
# mark | ||
item_str = re.sub('[\x00]', '?', item_str) | ||
print("{:<45}".format(item_str), end="") | ||
|
||
print() | ||
|
||
raise SystemExit(1) | ||
else: | ||
print("These files are identical.") |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,13 +5,15 @@ | |
Test running scripts | ||
""" | ||
|
||
from numpy.testing import (assert_almost_equal, | ||
assert_array_equal) | ||
|
||
from nose.tools import (assert_true, assert_false, assert_raises, | ||
assert_equal, assert_not_equal) | ||
from nose.tools import assert_equal | ||
|
||
import nibabel as nib | ||
from nibabel.cmdline.utils import * | ||
from nibabel.cmdline.diff import diff_header_fields, diff_headers | ||
from os.path import (dirname, join as pjoin, abspath) | ||
|
||
|
||
DATA_PATH = abspath(pjoin(dirname(__file__), '../../tests/data')) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is already one defined, just use from nibabel.testing import data_path note that in above '/' within the path might be *nix specific and might (?) not work on Windows. That is why we use all the |
||
|
||
|
||
def test_table2string(): | ||
|
@@ -42,3 +44,26 @@ def get_test(self): | |
|
||
assert_equal(safe_get(test, "test"), 2) | ||
assert_equal(safe_get(test, "failtest"), "-") | ||
|
||
|
||
def test_diff_headers(): | ||
fnames = [pjoin(DATA_PATH, f) | ||
for f in ('standard.nii.gz', 'example4d.nii.gz')] | ||
file_headers = [nib.load(f).header for f in fnames] | ||
headers = ['sizeof_hdr', 'data_type', 'db_name', 'extents', 'session_error', 'regular', 'dim_info', 'dim', 'intent_p1', | ||
'intent_p2', 'intent_p3', 'intent_code', 'datatype', 'bitpix', 'slice_start', 'pixdim', 'vox_offset', 'scl_slope', | ||
'scl_inter', 'slice_end', 'slice_code', 'xyzt_units', 'cal_max', 'cal_min', 'slice_duration', 'toffset', 'glmax', | ||
'glmin', 'descrip', 'aux_file', 'qform_code', 'sform_code', 'quatern_b', 'quatern_c', 'quatern_d', 'qoffset_x', | ||
'qoffset_y', 'qoffset_z', 'srow_x', 'srow_y', 'srow_z', 'intent_name', 'magic'] | ||
|
||
assert_equal(diff_headers(file_headers, headers), ['regular', 'dim_info', 'dim', 'datatype', 'bitpix', 'pixdim', | ||
'slice_end', 'xyzt_units', 'cal_max', 'descrip', 'qform_code', | ||
'sform_code', 'quatern_b', 'quatern_c', 'quatern_d', 'qoffset_x', | ||
'qoffset_y', 'qoffset_z', 'srow_x', 'srow_y', 'srow_z']) | ||
|
||
|
||
def test_diff_header_fields(): | ||
fnames = [pjoin(DATA_PATH, f) | ||
for f in ('standard.nii.gz', 'example4d.nii.gz')] | ||
file_headers = [nib.load(f).header for f in fnames] | ||
assert_equal(diff_header_fields("dim_info", file_headers), ['0', '57']) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wouldn't hold this PR up for this, but just FYI
optparse
has been deprecated, andargparse
is the supported argument parser.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yeah... we should convert all the cmdline tools which still use optparse in some one PR ;)