Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 24 additions & 12 deletions integrations/acquisition/covid_hosp/facility/test_scenarios.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# py3tester coverage target (equivalent to `import *`)
__test_target__ = 'delphi.epidata.acquisition.covid_hosp.facility.update'

NEWLINE="\n"

class AcquisitionTests(unittest.TestCase):

Expand Down Expand Up @@ -54,7 +55,7 @@ def test_acquire_dataset(self):
with self.subTest(name='no data yet'):
response = Epidata.covid_hosp_facility(
'450822', Epidata.range(20200101, 20210101))
self.assertEqual(response['result'], -2)
self.assertEqual(response['result'], -2, response)

# acquire sample data into local database
with self.subTest(name='first acquisition'):
Expand All @@ -63,21 +64,32 @@ def test_acquire_dataset(self):

# make sure the data now exists
with self.subTest(name='initial data checks'):
expected_spotchecks = {
"hospital_pk": "450822",
"collection_week": 20201030,
"publication_date": 20210315,
"previous_day_total_ed_visits_7_day_sum": 536,
"total_personnel_covid_vaccinated_doses_all_7_day_sum": 18,
"total_beds_7_day_avg": 69.3,
"previous_day_admission_influenza_confirmed_7_day_sum": -999999
}
response = Epidata.covid_hosp_facility(
'450822', Epidata.range(20200101, 20210101))
self.assertEqual(response['result'], 1)
self.assertEqual(len(response['epidata']), 1)
row = response['epidata'][0]
self.assertEqual(row['hospital_pk'], '450822')
self.assertEqual(row['collection_week'], 20201030)
self.assertEqual(row['publication_date'], 20210315)
self.assertEqual(row['previous_day_total_ed_visits_7_day_sum'], 536)
self.assertAlmostEqual(row['total_beds_7_day_avg'], 69.3)
self.assertEqual(
row['previous_day_admission_influenza_confirmed_7_day_sum'], -999999)

# expect 94 fields per row (95 database columns, except `id`)
self.assertEqual(len(row), 94)
for k,v in expected_spotchecks.items():
self.assertTrue(
k in row,
f"no '{k}' in row:\n{NEWLINE.join(sorted(row.keys()))}"
)
if isinstance(v, float):
self.assertAlmostEqual(row[k], v, f"row[{k}] is {row[k]} not {v}")
else:
self.assertEqual(row[k], v, f"row[{k}] is {row[k]} not {v}")

# expect 113 fields per row (114 database columns, except `id`)
self.assertEqual(len(row), 113)

# re-acquisition of the same dataset should be a no-op
with self.subTest(name='second acquisition'):
Expand Down Expand Up @@ -108,7 +120,7 @@ def test_facility_lookup(self):
self.assertTrue(acquired)

# texas ground truth, sorted by `hospital_pk`
# see sample data at testdata/acquisition/covid_hosp/facility/dataset.csv
# see sample data at testdata/acquisition/covid_hosp/facility/dataset_old.csv
texas_hospitals = [{
'hospital_pk': '450771',
'state': 'TX',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def test_acquire_dataset(self):
# make sure the data does not yet exist
with self.subTest(name='no data yet'):
response = Epidata.covid_hosp('MA', Epidata.range(20200101, 20210101))
self.assertEqual(response['result'], -2)
self.assertEqual(response['result'], -2, response)

# acquire sample data into local database
# mock out network calls to external hosts
Expand All @@ -75,13 +75,14 @@ def test_acquire_dataset(self):
self.assertEqual(row['date'], 20201209)
self.assertEqual(row['issue'], 20210315)
self.assertEqual(row['critical_staffing_shortage_today_yes'], 8)
self.assertEqual(row['total_patients_hospitalized_confirmed_influenza_covid_coverage'], 56)
actual = row['inpatient_bed_covid_utilization']
expected = 0.11729857819905214
self.assertAlmostEqual(actual, expected)
self.assertIsNone(row['critical_staffing_shortage_today_no'])

# expect 61 fields per row (63 database columns, except `id` and `record_type`)
self.assertEqual(len(row), 61)
self.assertEqual(len(row), 118)

with self.subTest(name='all date batches acquired'):
response = Epidata.covid_hosp('WY', Epidata.range(20200101, 20210101), issues=20210313)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,14 @@ def test_acquire_dataset(self):
self.assertEqual(row['date'], 20200826)
self.assertEqual(row['issue'], 20210315)
self.assertEqual(row['critical_staffing_shortage_today_yes'], 2)
self.assertEqual(row['total_patients_hospitalized_confirmed_influenza_covid_coverage'], 56)
actual = row['inpatient_bed_covid_utilization']
expected = 0.011946591707659873
self.assertAlmostEqual(actual, expected)
self.assertIsNone(row['critical_staffing_shortage_today_no'])

# expect 61 fields per row (63 database columns, except `id` and `record_type`)
self.assertEqual(len(row), 61)
self.assertEqual(len(row), 118)

# re-acquisition of the same dataset should be a no-op
with self.subTest(name='second acquisition'):
Expand Down
2 changes: 1 addition & 1 deletion integrations/server/test_covid_hosp.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def setUp(self):
def insert_issue(self, cur, issue, value, record_type):
so_many_nulls = ', '.join(['null'] * 57)
cur.execute(f'''insert into covid_hosp_state_timeseries values (
0, {issue}, 'PA', 20201118, {value}, {so_many_nulls}, '{record_type}'
0, {issue}, 'PA', 20201118, {value}, {so_many_nulls}, '{record_type}', {so_many_nulls}
)''')

def test_query_by_issue(self):
Expand Down
10 changes: 6 additions & 4 deletions src/acquisition/covid_hosp/common/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,17 +150,19 @@ def insert_dataset(self, publication_date, dataframe):
dataframe : pandas.DataFrame
The dataset.
"""

num_columns = 2 + len(self.columns_and_types) + len(self.additional_fields)
dataframe_columns_and_types = [
x for x in self.columns_and_types if x[0] in dataframe.columns
]
num_columns = 2 + len(dataframe_columns_and_types) + len(self.additional_fields)
value_placeholders = ', '.join(['%s'] * num_columns)
columns = ', '.join(f'`{i[1]}`' for i in self.columns_and_types + self.additional_fields)
columns = ', '.join(f'`{i[1]}`' for i in dataframe_columns_and_types + self.additional_fields)
sql = f'INSERT INTO `{self.table_name}` (`id`, `{self.publication_col_name}`, {columns}) ' \
f'VALUES ({value_placeholders})'
id_and_publication_date = (0, publication_date)
with self.new_cursor() as cursor:
for _, row in dataframe.iterrows():
values = []
for name, _, dtype in self.columns_and_types:
for name, _, dtype in dataframe_columns_and_types:
if isinstance(row[name], float) and math.isnan(row[name]):
values.append(None)
else:
Expand Down
Loading