Skip to content

Commit 175c900

Browse files
committed
refactor: improve a few error messages
1 parent 5de1dae commit 175c900

File tree

2 files changed

+34
-32
lines changed

2 files changed

+34
-32
lines changed

src/acquisition/covidcast/csv_importer.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ def is_header_valid(columns):
237237

238238

239239
@staticmethod
240-
def extract_and_check_row(geo_type: str, table: pd.DataFrame) -> pd.DataFrame:
240+
def extract_and_check_row(geo_type: str, table: pd.DataFrame, details: PathDetails) -> pd.DataFrame:
241241
"""Extract and return `CsvRowValue` from a CSV row, with sanity checks.
242242
243243
Also returns the name of the field which failed sanity check, or None.
@@ -250,19 +250,19 @@ def validate_geo_code(fail_mask: pd.Series, geo_type: str):
250250
validation_fails = table[fail_mask]
251251
if not validation_fails.empty:
252252
first_fail = validation_fails.iloc[0]
253-
raise GeoIdSanityCheckException(f'{geo_type} does not satisfy validation check', geo_id=first_fail["geo_id"])
253+
raise GeoIdSanityCheckException(f'Invalid geo_id for {geo_type}', geo_id=first_fail["geo_id"])
254254

255255
def validate_quantity(column: pd.Series):
256256
"""Validate a column of a table using a validation function."""
257257
infinities = column[column.isin([float('inf'), float('-inf')])]
258258
if not infinities.empty:
259259
first_fail = infinities.iloc[0]
260-
raise ValueSanityCheckException(f'Found infinity in {column.name}: {first_fail}')
260+
raise ValueSanityCheckException(f'Invalid infinite value in {column.name}: {first_fail}', first_fail)
261261

262262
negative_values = column[column.lt(0)]
263263
if not negative_values.empty:
264264
first_fail = negative_values.iloc[0]
265-
raise ValueSanityCheckException(f'Found negative value in {column.name}: {first_fail}')
265+
raise ValueSanityCheckException(f'Invalid negative value in {column.name}: {first_fail}', first_fail)
266266

267267
return column
268268

@@ -283,13 +283,13 @@ def validate_missing_code(missing_code: pd.Series, column: pd.Series):
283283
contradict_mask = missing_code.ne(Nans.NOT_MISSING.value) & column.notna()
284284
if contradict_mask.any():
285285
first_fail = missing_code[contradict_mask].iloc[0]
286-
logger.warning(f'Correcting contradicting missing code: {first_fail}')
286+
logger.warning(f'Correcting contradicting missing code: {first_fail} in {details.source}:{details.signal} {details.time_value} {details.geo_type}')
287287
missing_code[contradict_mask] = Nans.NOT_MISSING.value
288288

289289
contradict_mask = missing_code.eq(Nans.NOT_MISSING.value) & column.isna()
290290
if contradict_mask.any():
291291
first_fail = missing_code[contradict_mask].iloc[0]
292-
logger.warning(f'Correcting contradicting missing code: {first_fail}')
292+
logger.warning(f'Correcting contradicting missing code: {first_fail} in {details.source}:{details.signal} {details.time_value} {details.geo_type}')
293293
missing_code[contradict_mask] = Nans.OTHER.value
294294

295295
return missing_code
@@ -313,7 +313,7 @@ def validate_missing_code(missing_code: pd.Series, column: pd.Series):
313313
elif geo_type == 'nation':
314314
fail_mask = table['geo_id'] != 'us'
315315
else:
316-
raise GeoTypeSanityCheckException(f'Unknown geo_type: {geo_type}')
316+
raise GeoTypeSanityCheckException(f'Invalid geo_type: {geo_type}')
317317

318318
validate_geo_code(fail_mask, geo_type)
319319

@@ -362,7 +362,7 @@ def load_csv(filepath: str, details: PathDetails) -> Optional[List[CovidcastRow]
362362
table[key] = np.nan
363363

364364
try:
365-
table = CsvImporter.extract_and_check_row(details.geo_type, table)
365+
table = CsvImporter.extract_and_check_row(details.geo_type, table, details)
366366
except GeoIdSanityCheckException as err:
367367
row = table.loc[table['geo_id'] == err.geo_id]
368368
logger.warning(event='invalid value for row', detail=(row.to_csv(header=False, index=False, na_rep='NA')), file=filepath)
@@ -371,7 +371,8 @@ def load_csv(filepath: str, details: PathDetails) -> Optional[List[CovidcastRow]
371371
logger.warning(event='invalid value for row', detail=err, file=filepath)
372372
return None
373373
except ValueSanityCheckException as err:
374-
logger.warning(event='invalid value for row', file=filepath)
374+
row = table.loc[table['value'] == err.value]
375+
logger.warning(event='invalid value for row', detail=(row.to_csv(header=False, index=False, na_rep='NA')), file=filepath)
375376
return None
376377
except Exception as err:
377378
logger.warning(event='unknown error occured in extract_and_check_row', detail=err, file=filepath)

tests/acquisition/covidcast/test_csv_importer.py

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -175,60 +175,61 @@ def make_row(
175175
"missing_sample_size": [missing_sample_size]
176176
})
177177
return row
178+
179+
details = PathDetails(20200408, 0, 'src', 'sig', 'day', 20200408, 'state')
178180

179181
# Failure cases.
180182
with pytest.raises(GeoIdSanityCheckException):
181-
CsvImporter.extract_and_check_row('county', make_row(geo_id='1234'))
183+
CsvImporter.extract_and_check_row('county', make_row(geo_id='1234'), details)
182184
with pytest.raises(GeoIdSanityCheckException):
183-
CsvImporter.extract_and_check_row('county', make_row(geo_id='00000'))
185+
CsvImporter.extract_and_check_row('county', make_row(geo_id='00000'), details)
184186
with pytest.raises(GeoIdSanityCheckException):
185-
CsvImporter.extract_and_check_row('hrr', make_row(geo_id='600'))
187+
CsvImporter.extract_and_check_row('hrr', make_row(geo_id='600'), details)
186188
with pytest.raises(GeoIdSanityCheckException):
187-
CsvImporter.extract_and_check_row('msa', make_row(geo_id='1234'))
189+
CsvImporter.extract_and_check_row('msa', make_row(geo_id='1234'), details)
188190
with pytest.raises(GeoIdSanityCheckException):
189-
CsvImporter.extract_and_check_row('msa', make_row(geo_id='01234'))
191+
CsvImporter.extract_and_check_row('msa', make_row(geo_id='01234'), details)
190192
with pytest.raises(GeoIdSanityCheckException):
191-
CsvImporter.extract_and_check_row('dma', make_row(geo_id='400'))
193+
CsvImporter.extract_and_check_row('dma', make_row(geo_id='400'), details)
192194
with pytest.raises(GeoIdSanityCheckException):
193-
CsvImporter.extract_and_check_row('state', make_row(geo_id='48'))
195+
CsvImporter.extract_and_check_row('state', make_row(geo_id='48'), details)
194196
with pytest.raises(GeoIdSanityCheckException):
195-
CsvImporter.extract_and_check_row('state', make_row(geo_id='iowa'))
197+
CsvImporter.extract_and_check_row('state', make_row(geo_id='iowa'), details)
196198
with pytest.raises(GeoIdSanityCheckException):
197-
CsvImporter.extract_and_check_row('nation', make_row(geo_id='0000'))
199+
CsvImporter.extract_and_check_row('nation', make_row(geo_id='0000'), details)
198200
with pytest.raises(GeoIdSanityCheckException):
199-
CsvImporter.extract_and_check_row('hhs', make_row(geo_id='0'))
201+
CsvImporter.extract_and_check_row('hhs', make_row(geo_id='0'), details)
200202
with pytest.raises(GeoIdSanityCheckException):
201-
CsvImporter.extract_and_check_row('county', make_row(geo_id=None))
203+
CsvImporter.extract_and_check_row('county', make_row(geo_id=None), details)
202204

203205
with pytest.raises(Exception):
204-
CsvImporter.extract_and_check_row('hrr', make_row(geo_id='hrr001'))
206+
CsvImporter.extract_and_check_row('hrr', make_row(geo_id='hrr001'), details)
205207

206208
with pytest.raises(GeoTypeSanityCheckException):
207-
CsvImporter.extract_and_check_row('province', make_row(geo_id='ab'))
209+
CsvImporter.extract_and_check_row('province', make_row(geo_id='ab'), details)
208210
with pytest.raises(GeoTypeSanityCheckException):
209-
CsvImporter.extract_and_check_row(None, make_row())
211+
CsvImporter.extract_and_check_row(None, make_row(), details)
210212

211213
with pytest.raises(ValueSanityCheckException):
212-
CsvImporter.extract_and_check_row('state', make_row(stderr=-1))
214+
CsvImporter.extract_and_check_row('state', make_row(stderr=-1), details)
213215
with pytest.raises(ValueSanityCheckException):
214-
CsvImporter.extract_and_check_row('state', make_row(value=float('inf')))
216+
CsvImporter.extract_and_check_row('state', make_row(value=float('inf')), details)
215217
with pytest.raises(ValueSanityCheckException):
216-
CsvImporter.extract_and_check_row('state', make_row(stderr=float('inf')))
218+
CsvImporter.extract_and_check_row('state', make_row(stderr=float('inf')), details)
217219
with pytest.raises(ValueSanityCheckException):
218-
CsvImporter.extract_and_check_row('state', make_row(sample_size=float('inf')))
219-
220+
CsvImporter.extract_and_check_row('state', make_row(sample_size=float('inf')), details)
220221

221222
# Success cases with NANs.
222-
table = CsvImporter.extract_and_check_row('state', make_row())
223+
table = CsvImporter.extract_and_check_row('state', make_row(), details)
223224
assert table.compare(make_row('vi', 1.23, 4.56, 100.5, Nans.NOT_MISSING, Nans.NOT_MISSING, Nans.NOT_MISSING)).empty
224225

225-
table = CsvImporter.extract_and_check_row('state', make_row(value=None, stderr=np.nan, sample_size=None, missing_value=Nans.DELETED, missing_stderr=Nans.DELETED, missing_sample_size=Nans.DELETED))
226+
table = CsvImporter.extract_and_check_row('state', make_row(value=None, stderr=np.nan, sample_size=None, missing_value=Nans.DELETED, missing_stderr=Nans.DELETED, missing_sample_size=Nans.DELETED), details)
226227
assert table.compare(make_row('vi', None, None, None, Nans.DELETED, Nans.DELETED, Nans.DELETED)).empty
227228

228-
table = CsvImporter.extract_and_check_row('state', make_row(stderr=None, sample_size=np.nan, missing_stderr=Nans.OTHER, missing_sample_size=Nans.OTHER))
229+
table = CsvImporter.extract_and_check_row('state', make_row(stderr=None, sample_size=np.nan, missing_stderr=Nans.OTHER, missing_sample_size=Nans.OTHER), details)
229230
assert table.compare(make_row('vi', 1.23, None, None, Nans.NOT_MISSING, Nans.OTHER, Nans.OTHER)).empty
230231

231-
table = CsvImporter.extract_and_check_row('state', make_row(sample_size=None, missing_value=Nans.NOT_MISSING, missing_stderr=Nans.OTHER, missing_sample_size=Nans.NOT_MISSING))
232+
table = CsvImporter.extract_and_check_row('state', make_row(sample_size=None, missing_value=Nans.NOT_MISSING, missing_stderr=Nans.OTHER, missing_sample_size=Nans.NOT_MISSING), details)
232233
assert table.compare(make_row('vi', 1.23, 4.56, None, Nans.NOT_MISSING, Nans.NOT_MISSING, Nans.OTHER)).empty
233234

234235

0 commit comments

Comments
 (0)