From a33414c07558713e870bb174ec53c7059ed44528 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Sat, 24 Oct 2020 04:26:34 -0700 Subject: [PATCH 1/2] Fix geomapper * change add_population_column signature back to how it was * fix bug with dropna flag in same function being interpreted backwards * get tests to match * change usage of this function in JHU --- _delphi_utils_python/delphi_utils/geomap.py | 12 +++++------- _delphi_utils_python/tests/test_geomap.py | 8 +++----- jhu/delphi_jhu/pull.py | 2 +- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/_delphi_utils_python/delphi_utils/geomap.py b/_delphi_utils_python/delphi_utils/geomap.py index 65885ffea..056587199 100644 --- a/_delphi_utils_python/delphi_utils/geomap.py +++ b/_delphi_utils_python/delphi_utils/geomap.py @@ -392,17 +392,17 @@ def replace_geocode( df = df.groupby([date_col, new_col]).sum().reset_index() return df - def add_population_column(self, geocode_type, data=None, geocode_col=None, dropna=True): + def add_population_column(self, data, geocode_type, geocode_col=None, dropna=True): """ Appends a population column to a dataframe, based on the FIPS or ZIP code. If no dataframe is provided, the full crosswalk from geocode to population is returned. Parameters --------- - geocode_type: {"fips", "zip"} - The type of the geocode contained in geocode_col. data: pd.DataFrame The dataframe with a FIPS code column. + geocode_type: {"fips", "zip"} + The type of the geocode contained in geocode_col. geocode_col: str, default None The name of the column containing the geocodes. If None, uses the geocode_type as the name. @@ -413,6 +413,7 @@ def add_population_column(self, geocode_type, data=None, geocode_col=None, dropn A dataframe with a population column appended. """ geocode_col = geocode_type if geocode_col is None else geocode_col + data = data.copy() if geocode_type not in ["fips", "zip"]: raise ValueError( @@ -422,13 +423,10 @@ def add_population_column(self, geocode_type, data=None, geocode_col=None, dropn pop_df = self._load_crosswalk(from_code=geocode_type, to_code="pop") - if data is None: - return pop_df.rename(columns={"pop": "population"}) - if not is_string_dtype(data[geocode_col]): data[geocode_col] = data[geocode_col].astype(str).str.zfill(5) - merge_type = "left" if dropna else "inner" + merge_type = "inner" if dropna else "left" data_with_pop = ( data.copy() .merge(pop_df, left_on=geocode_col, right_on=geocode_type, how=merge_type) diff --git a/_delphi_utils_python/tests/test_geomap.py b/_delphi_utils_python/tests/test_geomap.py index b756d1618..fca26b8ea 100644 --- a/_delphi_utils_python/tests/test_geomap.py +++ b/_delphi_utils_python/tests/test_geomap.py @@ -277,14 +277,12 @@ def test_zip_to_state_id(self): def test_add_population_column(self): gmpr = GeoMapper() - new_data = gmpr.add_population_column("fips", self.fips_data_3) + new_data = gmpr.add_population_column(self.fips_data_3, "fips") assert new_data["population"].sum() == 274963 - new_data = gmpr.add_population_column("zip", self.zip_data) + new_data = gmpr.add_population_column(self.zip_data, "zip") assert new_data["population"].sum() == 274902 with pytest.raises(ValueError): - new_data = gmpr.add_population_column("hrr", self.zip_data) - pop_df = gmpr.add_population_column("fips") - assert pop_df.shape == (3274, 2) + new_data = gmpr.add_population_column(self.zip_data, "hrr") def test_add_geocode(self): gmpr = GeoMapper() diff --git a/jhu/delphi_jhu/pull.py b/jhu/delphi_jhu/pull.py index 0330db62b..849c1f98a 100644 --- a/jhu/delphi_jhu/pull.py +++ b/jhu/delphi_jhu/pull.py @@ -101,7 +101,7 @@ def pull_jhu_data(base_url: str, metric: str, gmpr: GeoMapper) -> pd.DataFrame: ) # Merge in population, set population as NAN for fake fips - df = gmpr.add_population_column("fips", df) + df = gmpr.add_population_column(df, "fips") df = create_diffs_column(df) From c1fff63321e1aec58211df78099eee116a613116 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Sat, 24 Oct 2020 04:36:32 -0700 Subject: [PATCH 2/2] Remove an extra data.copy in geomapper --- _delphi_utils_python/delphi_utils/geomap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_delphi_utils_python/delphi_utils/geomap.py b/_delphi_utils_python/delphi_utils/geomap.py index 056587199..b3fe682a1 100644 --- a/_delphi_utils_python/delphi_utils/geomap.py +++ b/_delphi_utils_python/delphi_utils/geomap.py @@ -428,7 +428,7 @@ def add_population_column(self, data, geocode_type, geocode_col=None, dropna=Tru merge_type = "inner" if dropna else "left" data_with_pop = ( - data.copy() + data .merge(pop_df, left_on=geocode_col, right_on=geocode_type, how=merge_type) .rename(columns={"pop": "population"}) )