From 1357f24f08079a3850bec9efda306440d35c0288 Mon Sep 17 00:00:00 2001 From: andrew Date: Tue, 8 Dec 2020 13:57:21 -0800 Subject: [PATCH 1/4] Add hhs to utils --- _delphi_utils_python/delphi_utils/geomap.py | 22 +++++++++------ _delphi_utils_python/tests/test_geomap.py | 31 +++++++++++++++++++++ 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/_delphi_utils_python/delphi_utils/geomap.py b/_delphi_utils_python/delphi_utils/geomap.py index d61a2a823..fb53eb443 100644 --- a/_delphi_utils_python/delphi_utils/geomap.py +++ b/_delphi_utils_python/delphi_utils/geomap.py @@ -22,6 +22,7 @@ "msa": join(DATA_PATH, "zip_msa_table.csv"), "pop": join(DATA_PATH, "zip_pop.csv"), "state": join(DATA_PATH, "zip_state_code_table.csv"), + "hhs_region_number": join(DATA_PATH, "zip_hhs_table.csv") }, "fips": { "zip": join(DATA_PATH, "fips_zip_table.csv"), @@ -29,6 +30,7 @@ "msa": join(DATA_PATH, "fips_msa_table.csv"), "pop": join(DATA_PATH, "fips_pop.csv"), "state": join(DATA_PATH, "fips_state_table.csv"), + "hhs_region_number": join(DATA_PATH, "fips_hhs_table.csv"), }, "state": {"state": join(DATA_PATH, "state_codes_table.csv")}, "state_code": { @@ -55,12 +57,14 @@ class GeoMapper: # pylint: disable=too-many-public-methods - [x] zip -> hrr : unweighted - [x] zip -> msa : unweighted - [x] zip -> state + - [x] zip -> hhs_region_number - [x] zip -> population - [x] state code -> hhs_region_number - [x] fips -> state : unweighted - [x] fips -> msa : unweighted - [x] fips -> megacounty - [x] fips -> hrr + - [x] fips -> hhs_region_number - [x] nation - [ ] zip -> dma (postponed) @@ -102,8 +106,10 @@ def __init__(self): """ self.crosswalk_filepaths = CROSSWALK_FILEPATHS self.crosswalks = { - "zip": {"fips": None, "hrr": None, "msa": None, "pop": None, "state": None}, - "fips": {"zip": None, "hrr": None, "msa": None, "pop": None, "state": None}, + "zip": {"fips": None, "hrr": None, "msa": None, + "pop": None, "state": None, "hhs_region_number": None}, + "fips": {"zip": None, "hrr": None, "msa": None, + "pop": None, "state": None, "hhs_region_number": None}, "state": {"state": None}, "state_code": {"hhs_region_number": None}, "jhu_uid": {"fips": None}, @@ -123,6 +129,7 @@ def _load_crosswalk(self, from_code, to_code): ("jhu_uid", "fips"), ("zip", "msa"), ("fips", "hrr"), + ("zip", "hhs_region_number") ]: self.crosswalks[from_code][to_code] = pd.read_csv( stream, @@ -136,6 +143,8 @@ def _load_crosswalk(self, from_code, to_code): elif (from_code, to_code) in [ ("zip", "hrr"), ("fips", "msa"), + ("fips", "hhs_region_number"), + ("state_code", "hhs_region_number") ]: self.crosswalks[from_code][to_code] = pd.read_csv( stream, @@ -151,11 +160,6 @@ def _load_crosswalk(self, from_code, to_code): "state_name": str, }, ) - elif (from_code, to_code) == ("state_code", "hhs_region_number"): - self.crosswalks[from_code][to_code] = pd.read_csv( - stream, - dtype={"state_code": str, "hhs_region_number": str}, - ) elif (from_code, to_code) == ("zip", "state"): self.crosswalks[from_code][to_code] = pd.read_csv( stream, @@ -255,8 +259,8 @@ def add_geocode( """Add a new geocode column to a dataframe. Currently supported conversions: - - fips -> state_code, state_id, state_name, zip, msa, hrr, nation - - zip -> state_code, state_id, state_name, fips, msa, hrr, nation + - fips -> state_code, state_id, state_name, zip, msa, hrr, nation, hhs_region_number + - zip -> state_code, state_id, state_name, fips, msa, hrr, nation, hhs_region_number - jhu_uid -> fips - state_x -> state_y, where x and y are in {code, id, name} - state_code -> hhs_region_number diff --git a/_delphi_utils_python/tests/test_geomap.py b/_delphi_utils_python/tests/test_geomap.py index a0dcbf4bf..6cd79ac2b 100644 --- a/_delphi_utils_python/tests/test_geomap.py +++ b/_delphi_utils_python/tests/test_geomap.py @@ -137,6 +137,9 @@ def test_crosswalks(self): # assert cw.groupby("zip")["weight"].sum().round(5).eq(1.0).all() cw = gmpr._load_crosswalk(from_code="zip", to_code="state") assert cw.groupby("zip")["weight"].sum().round(5).eq(1.0).all() + cw = gmpr._load_crosswalk(from_code="zip", to_code="hhs_region_number") + assert cw.groupby("zip")["weight"].sum().round(5).eq(1.0).all() + def test_load_zip_fips_table(self): gmpr = GeoMapper() @@ -261,3 +264,31 @@ def test_add_geocode(self): } ) ) + + # fips -> hhs + new_data = gmpr.replace_geocode(self.fips_data_3.drop(columns=["date"]), + "fips", "hhs_region_number", date_col=None) + assert new_data.equals( + pd.DataFrame().from_dict( + { + "hhs_region_number": {0: "2", 1: "6"}, + "count": {0: 12, 1: 6}, + "total": {0: 111, 1: 13} + } + ) + ) + + # zip -> hhs + new_data = gmpr.replace_geocode(self.zip_data, "zip", "hhs_region_number") + new_data = new_data.round(10) # get rid of a floating point error with 99.00000000000001 + assert new_data.equals( + pd.DataFrame().from_dict( + { + "date": {0: pd.Timestamp("2018-01-01"), 1: pd.Timestamp("2018-01-01"), + 2: pd.Timestamp("2018-01-03"), 3: pd.Timestamp("2018-01-03")}, + "hhs_region_number": {0: "5", 1: "9", 2: "5", 3: "9"}, + "count": {0: 99.0, 1: 801.0, 2: 100.0, 3: 786.0}, + "total": {0: 198.0, 1: 1602.0, 2: 200.0, 3: 1572.0} + } + ) + ) From d8b6ab61ad5fd5751b733ab116163f0d9fa5c1d1 Mon Sep 17 00:00:00 2001 From: andrew Date: Tue, 8 Dec 2020 14:16:37 -0800 Subject: [PATCH 2/4] make crosswalk init less verbose --- _delphi_utils_python/delphi_utils/geomap.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/_delphi_utils_python/delphi_utils/geomap.py b/_delphi_utils_python/delphi_utils/geomap.py index fb53eb443..3dd119df2 100644 --- a/_delphi_utils_python/delphi_utils/geomap.py +++ b/_delphi_utils_python/delphi_utils/geomap.py @@ -106,10 +106,12 @@ def __init__(self): """ self.crosswalk_filepaths = CROSSWALK_FILEPATHS self.crosswalks = { - "zip": {"fips": None, "hrr": None, "msa": None, - "pop": None, "state": None, "hhs_region_number": None}, - "fips": {"zip": None, "hrr": None, "msa": None, - "pop": None, "state": None, "hhs_region_number": None}, + "zip": { + geo: None for geo in ["fips", "hrr", "msa", "pop", "state", "hhs_region_number"] + }, + "fips": { + geo: None for geo in ["zip", "hrr", "msa", "pop", "state", "hhs_region_number"] + }, "state": {"state": None}, "state_code": {"hhs_region_number": None}, "jhu_uid": {"fips": None}, From 90aafdd6913f6c57629f9a9241168b7a31b1e674 Mon Sep 17 00:00:00 2001 From: andrew Date: Tue, 8 Dec 2020 14:32:28 -0800 Subject: [PATCH 3/4] rename to hhs --- .../data_proc/geomap/geo_data_proc.py | 8 ++--- .../delphi_utils/data/fips_hhs_table.csv | 2 +- ...ber_table.csv => state_code_hhs_table.csv} | 2 +- .../delphi_utils/data/zip_hhs_table.csv | 2 +- _delphi_utils_python/delphi_utils/geomap.py | 36 +++++++++---------- _delphi_utils_python/tests/test_geomap.py | 14 ++++---- 6 files changed, 32 insertions(+), 32 deletions(-) rename _delphi_utils_python/delphi_utils/data/{state_code_hhs_region_number_table.csv => state_code_hhs_table.csv} (91%) diff --git a/_delphi_utils_python/data_proc/geomap/geo_data_proc.py b/_delphi_utils_python/data_proc/geomap/geo_data_proc.py index de95d857d..c2cd08e1e 100644 --- a/_delphi_utils_python/data_proc/geomap/geo_data_proc.py +++ b/_delphi_utils_python/data_proc/geomap/geo_data_proc.py @@ -45,7 +45,7 @@ ZIP_STATE_CODE_OUT_FILENAME = "zip_state_code_table.csv" ZIP_HHS_FILENAME = "zip_hhs_table.csv" STATE_OUT_FILENAME = "state_codes_table.csv" -STATE_HHS_OUT_FILENAME = "state_code_hhs_region_number_table.csv" +STATE_HHS_OUT_FILENAME = "state_code_hhs_table.csv" JHU_FIPS_OUT_FILENAME = "jhu_uid_fips_table.csv" @@ -334,12 +334,12 @@ def create_state_hhs_crosswalk(): hhs_state_pairs.append((9, "Northern Mariana Islands")) # Make dataframe - hhs_df = pd.DataFrame(hhs_state_pairs, columns=["hhs_region_number", "state_name"]) - hhs_df["hhs_region_number"] = hhs_df["hhs_region_number"].astype(str) + hhs_df = pd.DataFrame(hhs_state_pairs, columns=["hhs", "state_name"]) + hhs_df["hhs"] = hhs_df["hhs"].astype(str) ( ss_df.merge(hhs_df, on="state_name", how="left") - .dropna()[["state_code", "hhs_region_number"]] + .dropna()[["state_code", "hhs"]] .to_csv(join(OUTPUT_DIR, STATE_HHS_OUT_FILENAME), index=False) ) diff --git a/_delphi_utils_python/delphi_utils/data/fips_hhs_table.csv b/_delphi_utils_python/delphi_utils/data/fips_hhs_table.csv index f5ceb708a..f5b348c09 100644 --- a/_delphi_utils_python/delphi_utils/data/fips_hhs_table.csv +++ b/_delphi_utils_python/delphi_utils/data/fips_hhs_table.csv @@ -1,4 +1,4 @@ -fips,hhs_region_number +fips,hhs 01000,4 01001,4 01003,4 diff --git a/_delphi_utils_python/delphi_utils/data/state_code_hhs_region_number_table.csv b/_delphi_utils_python/delphi_utils/data/state_code_hhs_table.csv similarity index 91% rename from _delphi_utils_python/delphi_utils/data/state_code_hhs_region_number_table.csv rename to _delphi_utils_python/delphi_utils/data/state_code_hhs_table.csv index 17464ccbb..f8963a838 100644 --- a/_delphi_utils_python/delphi_utils/data/state_code_hhs_region_number_table.csv +++ b/_delphi_utils_python/delphi_utils/data/state_code_hhs_table.csv @@ -1,4 +1,4 @@ -state_code,hhs_region_number +state_code,hhs 01,4 02,10 04,9 diff --git a/_delphi_utils_python/delphi_utils/data/zip_hhs_table.csv b/_delphi_utils_python/delphi_utils/data/zip_hhs_table.csv index a98ef197e..62ff8f137 100644 --- a/_delphi_utils_python/delphi_utils/data/zip_hhs_table.csv +++ b/_delphi_utils_python/delphi_utils/data/zip_hhs_table.csv @@ -1,4 +1,4 @@ -zip,weight,hhs_region_number +zip,weight,hhs 601,0.994345718901454,2 601,0.005654281098546043,2 602,1.0,2 diff --git a/_delphi_utils_python/delphi_utils/geomap.py b/_delphi_utils_python/delphi_utils/geomap.py index 3dd119df2..18c225045 100644 --- a/_delphi_utils_python/delphi_utils/geomap.py +++ b/_delphi_utils_python/delphi_utils/geomap.py @@ -22,7 +22,7 @@ "msa": join(DATA_PATH, "zip_msa_table.csv"), "pop": join(DATA_PATH, "zip_pop.csv"), "state": join(DATA_PATH, "zip_state_code_table.csv"), - "hhs_region_number": join(DATA_PATH, "zip_hhs_table.csv") + "hhs": join(DATA_PATH, "zip_hhs_table.csv") }, "fips": { "zip": join(DATA_PATH, "fips_zip_table.csv"), @@ -30,11 +30,11 @@ "msa": join(DATA_PATH, "fips_msa_table.csv"), "pop": join(DATA_PATH, "fips_pop.csv"), "state": join(DATA_PATH, "fips_state_table.csv"), - "hhs_region_number": join(DATA_PATH, "fips_hhs_table.csv"), + "hhs": join(DATA_PATH, "fips_hhs_table.csv"), }, "state": {"state": join(DATA_PATH, "state_codes_table.csv")}, "state_code": { - "hhs_region_number": join(DATA_PATH, "state_code_hhs_region_number_table.csv") + "hhs": join(DATA_PATH, "state_code_hhs_table.csv") }, "jhu_uid": {"fips": join(DATA_PATH, "jhu_uid_fips_table.csv")}, } @@ -57,14 +57,14 @@ class GeoMapper: # pylint: disable=too-many-public-methods - [x] zip -> hrr : unweighted - [x] zip -> msa : unweighted - [x] zip -> state - - [x] zip -> hhs_region_number + - [x] zip -> hhs - [x] zip -> population - - [x] state code -> hhs_region_number + - [x] state code -> hhs - [x] fips -> state : unweighted - [x] fips -> msa : unweighted - [x] fips -> megacounty - [x] fips -> hrr - - [x] fips -> hhs_region_number + - [x] fips -> hhs - [x] nation - [ ] zip -> dma (postponed) @@ -107,13 +107,13 @@ def __init__(self): self.crosswalk_filepaths = CROSSWALK_FILEPATHS self.crosswalks = { "zip": { - geo: None for geo in ["fips", "hrr", "msa", "pop", "state", "hhs_region_number"] + geo: None for geo in ["fips", "hrr", "msa", "pop", "state", "hhs"] }, "fips": { - geo: None for geo in ["zip", "hrr", "msa", "pop", "state", "hhs_region_number"] + geo: None for geo in ["zip", "hrr", "msa", "pop", "state", "hhs"] }, "state": {"state": None}, - "state_code": {"hhs_region_number": None}, + "state_code": {"hhs": None}, "jhu_uid": {"fips": None}, } @@ -131,7 +131,7 @@ def _load_crosswalk(self, from_code, to_code): ("jhu_uid", "fips"), ("zip", "msa"), ("fips", "hrr"), - ("zip", "hhs_region_number") + ("zip", "hhs") ]: self.crosswalks[from_code][to_code] = pd.read_csv( stream, @@ -145,8 +145,8 @@ def _load_crosswalk(self, from_code, to_code): elif (from_code, to_code) in [ ("zip", "hrr"), ("fips", "msa"), - ("fips", "hhs_region_number"), - ("state_code", "hhs_region_number") + ("fips", "hhs"), + ("state_code", "hhs") ]: self.crosswalks[from_code][to_code] = pd.read_csv( stream, @@ -261,11 +261,11 @@ def add_geocode( """Add a new geocode column to a dataframe. Currently supported conversions: - - fips -> state_code, state_id, state_name, zip, msa, hrr, nation, hhs_region_number - - zip -> state_code, state_id, state_name, fips, msa, hrr, nation, hhs_region_number + - fips -> state_code, state_id, state_name, zip, msa, hrr, nation, hhs + - zip -> state_code, state_id, state_name, fips, msa, hrr, nation, hhs - jhu_uid -> fips - state_x -> state_y, where x and y are in {code, id, name} - - state_code -> hhs_region_number + - state_code -> hhs Parameters --------- @@ -274,7 +274,7 @@ def add_geocode( from_code: {'fips', 'zip', 'jhu_uid', 'state_code', 'state_id', 'state_name'} Specifies the geocode type of the data in from_col. new_code: {'fips', 'zip', 'state_code', 'state_id', 'state_name', 'hrr', 'msa', - 'hhs_region_number'} + 'hhs'} Specifies the geocode type in new_col. from_col: str, default None Name of the column in dataframe containing from_code. If None, then the name @@ -364,7 +364,7 @@ def replace_geocode( - zip -> state_code, state_id, state_name, fips, msa, hrr, nation - jhu_uid -> fips - state_x -> state_y, where x and y are in {code, id, name} - - state_code -> hhs_region_number + - state_code -> hhs Parameters --------- @@ -377,7 +377,7 @@ def replace_geocode( new_col: str Name of the new column to add to data. new_code: {'fips', 'zip', 'state_code', 'state_id', 'state_name', 'hrr', 'msa', - 'hhs_region_number'} + 'hhs'} Specifies the geocode type of the data in new_col. date_col: str or None, default "date" Specify which column contains the date values. Used for value aggregation. diff --git a/_delphi_utils_python/tests/test_geomap.py b/_delphi_utils_python/tests/test_geomap.py index 6cd79ac2b..bf1141eb7 100644 --- a/_delphi_utils_python/tests/test_geomap.py +++ b/_delphi_utils_python/tests/test_geomap.py @@ -137,7 +137,7 @@ def test_crosswalks(self): # assert cw.groupby("zip")["weight"].sum().round(5).eq(1.0).all() cw = gmpr._load_crosswalk(from_code="zip", to_code="state") assert cw.groupby("zip")["weight"].sum().round(5).eq(1.0).all() - cw = gmpr._load_crosswalk(from_code="zip", to_code="hhs_region_number") + cw = gmpr._load_crosswalk(from_code="zip", to_code="hhs") assert cw.groupby("zip")["weight"].sum().round(5).eq(1.0).all() @@ -205,8 +205,8 @@ def test_add_geocode(self): # state_code -> hhs new_data = gmpr.add_geocode(self.zip_data, "zip", "state_code") - new_data2 = gmpr.add_geocode(new_data, "state_code", "hhs_region_number") - assert new_data2["hhs_region_number"].unique().size == 2 + new_data2 = gmpr.add_geocode(new_data, "state_code", "hhs") + assert new_data2["hhs"].unique().size == 2 # state_name -> state_id new_data = gmpr.replace_geocode(self.zip_data, "zip", "state_name") @@ -267,11 +267,11 @@ def test_add_geocode(self): # fips -> hhs new_data = gmpr.replace_geocode(self.fips_data_3.drop(columns=["date"]), - "fips", "hhs_region_number", date_col=None) + "fips", "hhs", date_col=None) assert new_data.equals( pd.DataFrame().from_dict( { - "hhs_region_number": {0: "2", 1: "6"}, + "hhs": {0: "2", 1: "6"}, "count": {0: 12, 1: 6}, "total": {0: 111, 1: 13} } @@ -279,14 +279,14 @@ def test_add_geocode(self): ) # zip -> hhs - new_data = gmpr.replace_geocode(self.zip_data, "zip", "hhs_region_number") + new_data = gmpr.replace_geocode(self.zip_data, "zip", "hhs") new_data = new_data.round(10) # get rid of a floating point error with 99.00000000000001 assert new_data.equals( pd.DataFrame().from_dict( { "date": {0: pd.Timestamp("2018-01-01"), 1: pd.Timestamp("2018-01-01"), 2: pd.Timestamp("2018-01-03"), 3: pd.Timestamp("2018-01-03")}, - "hhs_region_number": {0: "5", 1: "9", 2: "5", 3: "9"}, + "hhs": {0: "5", 1: "9", 2: "5", 3: "9"}, "count": {0: 99.0, 1: 801.0, 2: 100.0, 3: 786.0}, "total": {0: 198.0, 1: 1602.0, 2: 200.0, 3: 1572.0} } From 67b45a01b01b3f7f57d7f67c03e1bdb609fda105 Mon Sep 17 00:00:00 2001 From: andrew Date: Wed, 9 Dec 2020 01:47:01 -0800 Subject: [PATCH 4/4] rename hhs --- _delphi_utils_python/delphi_utils/data/zip_hhs_table.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_delphi_utils_python/delphi_utils/data/zip_hhs_table.csv b/_delphi_utils_python/delphi_utils/data/zip_hhs_table.csv index db6bb2b95..b729eef9f 100644 --- a/_delphi_utils_python/delphi_utils/data/zip_hhs_table.csv +++ b/_delphi_utils_python/delphi_utils/data/zip_hhs_table.csv @@ -1,4 +1,4 @@ -zip,weight,hhs_region_number +zip,weight,hhs 00601,0.994345718901454,2 00601,0.005654281098546043,2 00602,1.0,2