Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions usafacts/delphi_usafacts/pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ def pull_usafacts_data(base_url: str, metric: str, geo_mapper: GeoMapper) -> pd.
"""
# Read data
df = pd.read_csv(base_url.format(metric=metric)).rename({"countyFIPS":"FIPS"}, axis=1)
# Clean commas in count fields in case the input file included them
df[df.columns[4:]] = df[df.columns[4:]].applymap(
lambda x: int(x.replace(",", "")) if isinstance(x, str) else x)
# Check missing FIPS
null_mask = pd.isnull(df["FIPS"])
assert null_mask.sum() == 0
Expand Down
5 changes: 5 additions & 0 deletions usafacts/tests/test_data/small_deaths_pull.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
countyFIPS,County Name,State,stateFIPS,2/29/20,3/1/20,3/2/20
1,New York City Unallocated/Probable,NY,36,0,0,1
6000,Somewhere,NY,36,11,12,13
2270,Place,NY,36,101,101,"1,0,2"
36009,City,NY,36,2,4,6
19 changes: 13 additions & 6 deletions usafacts/tests/test_pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
from os.path import join

import pandas as pd
import numpy as np
from delphi_utils import GeoMapper
from delphi_usafacts.pull import pull_usafacts_data

base_url_good = "test_data/small_{metric}.csv"
base_url_good = "test_data/small_{metric}_pull.csv"

base_url_bad = {
"missing_days": "test_data/bad_{metric}_missing_days.csv",
Expand All @@ -21,11 +22,17 @@ class TestPullUSAFacts:
def test_good_file(self):
metric = "deaths"
df = pull_usafacts_data(base_url_good, metric, geo_mapper)

assert (
df.columns.values
== ["fips", "timestamp", "population", "new_counts", "cumulative_counts"]
).all()
expected_df = pd.DataFrame({
"fips": ["00001", "00001", "00001", "36009", "36009", "36009"],
"timestamp": [pd.Timestamp("2020-02-29"), pd.Timestamp("2020-03-01"),
pd.Timestamp("2020-03-02"), pd.Timestamp("2020-02-29"),
pd.Timestamp("2020-03-01"), pd.Timestamp("2020-03-02")],
"population": [np.nan, np.nan, np.nan, 76117., 76117., 76117.],
"new_counts": [0., 0., 1., 2., 2., 2.],
"cumulative_counts": [0, 0, 1, 2, 4, 6]},
index=[1, 2, 3, 5, 6, 7])
# sort since rows order doesn't matter
pd.testing.assert_frame_equal(df.sort_index(), expected_df.sort_index())

def test_missing_days(self):

Expand Down