@@ -212,7 +212,7 @@ def validate_quantity(row, attr_quantity):
212212 return "Error"
213213
214214 @staticmethod
215- def validate_missing_code (row , attr_quantity , attr_name ):
215+ def validate_missing_code (row , attr_quantity , attr_name , filepath = None , logger = None ):
216216 """Take a row and validate the missing code associated with
217217 a quantity (e.g., val, se, stderr).
218218
@@ -221,27 +221,32 @@ def validate_missing_code(row, attr_quantity, attr_name):
221221 to infer missing codes except for a very simple cases; the default
222222 is to produce an error so that the issue can be fixed in indicators.
223223 """
224- if hasattr (row , "missing_" + attr_name ):
225- missing_entry = getattr (row , "missing_" + attr_name )
224+ if logger is None :
225+ logger = get_structured_logger ('load_csv' )
226+ missing_entry = getattr (row , "missing_" + attr_name , None )
227+
228+ if missing_entry is not None :
226229 try :
227230 missing_entry = int (float (missing_entry )) # convert from string to float to int
228231 except ValueError :
229- return None
230- # A missing code should never contradict the quantity being present,
231- # since that will be filtered in the export_to_csv util in
232- # covidcast-indicators; nonetheless this code is here for safety.
233- if attr_quantity is not None and missing_entry != Nans .NOT_MISSING .value :
234- return None
235- elif attr_quantity is None and missing_entry == Nans .NOT_MISSING .value :
236- return None
237- return missing_entry
238- else :
239- if attr_quantity is None :
240- return Nans .OTHER .value
232+ missing_entry = None
233+
234+ if missing_entry is None and attr_quantity is not None :
241235 return Nans .NOT_MISSING .value
236+ if missing_entry is None and attr_quantity is None :
237+ return Nans .OTHER .value
238+
239+ if missing_entry != Nans .NOT_MISSING .value and attr_quantity is not None :
240+ logger .warning (event = f"missing_{ attr_name } column contradicting { attr_name } presence." , detail = (str (row )), file = filepath )
241+ return Nans .NOT_MISSING .value
242+ if missing_entry == Nans .NOT_MISSING .value and attr_quantity is None :
243+ logger .warning (event = f"missing_{ attr_name } column contradicting { attr_name } presence." , detail = (str (row )), file = filepath )
244+ return Nans .OTHER .value
245+
246+ return missing_entry
242247
243248 @staticmethod
244- def extract_and_check_row (row , geo_type ):
249+ def extract_and_check_row (row , geo_type , filepath = None ):
245250 """Extract and return `RowValues` from a CSV row, with sanity checks.
246251
247252 Also returns the name of the field which failed sanity check, or None.
@@ -316,15 +321,9 @@ def extract_and_check_row(row, geo_type):
316321 return (None , 'sample_size' )
317322
318323 # Validate and write missingness codes
319- missing_value = CsvImporter .validate_missing_code (row , value , "val" )
320- if missing_value is None :
321- return (None , 'missing_val' )
322- missing_stderr = CsvImporter .validate_missing_code (row , stderr , "se" )
323- if missing_stderr is None :
324- return (None , 'missing_se' )
325- missing_sample_size = CsvImporter .validate_missing_code (row , sample_size , "sample_size" )
326- if missing_sample_size is None :
327- return (None , 'missing_sample_size' )
324+ missing_value = CsvImporter .validate_missing_code (row , value , "val" , filepath )
325+ missing_stderr = CsvImporter .validate_missing_code (row , stderr , "se" , filepath )
326+ missing_sample_size = CsvImporter .validate_missing_code (row , sample_size , "sample_size" , filepath )
328327
329328 # return extracted and validated row values
330329 row_values = CsvImporter .RowValues (
@@ -353,7 +352,7 @@ def load_csv(filepath, geo_type, pandas=pandas):
353352 return
354353
355354 for row in table .itertuples (index = False ):
356- row_values , error = CsvImporter .extract_and_check_row (row , geo_type )
355+ row_values , error = CsvImporter .extract_and_check_row (row , geo_type , filepath )
357356 if error :
358357 logger .warning (event = 'invalid value for row' , detail = (str (row ), error ), file = filepath )
359358 yield None
0 commit comments