From 2cfdc5f1db5b510267358fd4352e0696769f8a33 Mon Sep 17 00:00:00 2001 From: undefx Date: Thu, 30 Apr 2020 10:49:21 -0500 Subject: [PATCH] protect against failed type casts - strings where numbers are expected shouldn't cause the importer to crash - catch ValueError and report the field as invalid - add unit tests which would have caught this failure mode - unit and integration tests pass --- src/acquisition/covidcast/csv_importer.py | 18 +++++++++++++++--- .../acquisition/covidcast/test_csv_importer.py | 4 ++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/acquisition/covidcast/csv_importer.py b/src/acquisition/covidcast/csv_importer.py index e09ee0967..6570dc8ea 100644 --- a/src/acquisition/covidcast/csv_importer.py +++ b/src/acquisition/covidcast/csv_importer.py @@ -170,7 +170,11 @@ def extract_and_check_row(row, geo_type): if geo_type in ('hrr', 'msa', 'dma'): # these particular ids are prone to be written as ints -- and floats - geo_id = str(CsvImporter.floaty_int(geo_id)) + try: + geo_id = str(CsvImporter.floaty_int(geo_id)) + except ValueError: + # expected a number, but got a string + return (None, 'geo_id') # sanity check geo_id with respect to geo_type if geo_type == 'county': @@ -207,12 +211,20 @@ def extract_and_check_row(row, geo_type): return (None, 'val') # optional nonnegative float - stderr = CsvImporter.maybe_apply(float, row.se) + try: + stderr = CsvImporter.maybe_apply(float, row.se) + except ValueError: + # expected a number, but got a string + return (None, 'se') if stderr is not None and stderr < 0: return (None, 'se') # optional not-too-small float - sample_size = CsvImporter.maybe_apply(float, row.sample_size) + try: + sample_size = CsvImporter.maybe_apply(float, row.sample_size) + except ValueError: + # expected a number, but got a string + return (None, 'sample_size') if sample_size is not None and sample_size < CsvImporter.MIN_SAMPLE_SIZE: return (None, 'sample_size') diff --git a/tests/acquisition/covidcast/test_csv_importer.py b/tests/acquisition/covidcast/test_csv_importer.py index 20a175cce..f4bb36b4f 100644 --- a/tests/acquisition/covidcast/test_csv_importer.py +++ b/tests/acquisition/covidcast/test_csv_importer.py @@ -138,6 +138,10 @@ def make_row( (make_row(val=None), 'val'), (make_row(val='nan'), 'val'), (make_row(val='NaN'), 'val'), + (make_row(geo_type='hrr', geo_id='hrr001'), 'geo_id'), + (make_row(val='val'), 'val'), + (make_row(se='se'), 'se'), + (make_row(sample_size='sample_size'), 'sample_size'), ] for ((geo_type, row), field) in failure_cases: