@@ -77,6 +77,11 @@ class GeoMapper:
7777 ==========
7878 The main GeoMapper object loads and stores crosswalk dataframes on-demand.
7979
80+ When replacing geocodes with a new one an aggregation step is performed on the data columns
81+ to merge entries (i.e. in the case of a many to one mapping or a weighted mapping). This
82+ requires a specification of the data columns, which are assumed to be all the columns that
83+ are not the geocodes or the date column specified in date_col.
84+
8085 Example 1: to add a new column with a new geocode, possibly with weights:
8186 > gmpr = GeoMapper()
8287 > df = gmpr.add_geocode(df, "fips", "zip", from_col="fips", new_col="geo_id",
@@ -305,7 +310,12 @@ def add_geocode(
305310 )
306311
307312 # state codes are all stored in one table
308- if new_code in state_codes :
313+ if from_code in state_codes and new_code in state_codes :
314+ crosswalk = self ._load_crosswalk (from_code = "state" , to_code = "state" )
315+ crosswalk = crosswalk .rename (
316+ columns = {from_code : from_col , new_code : new_col }
317+ )
318+ elif new_code in state_codes :
309319 crosswalk = self ._load_crosswalk (from_code = from_code , to_code = "state" )
310320 crosswalk = crosswalk .rename (
311321 columns = {from_code : from_col , new_code : new_col }
@@ -322,9 +332,13 @@ def add_geocode(
322332 df = df .merge (crosswalk , left_on = from_col , right_on = from_col , how = "left" )
323333
324334 # Drop extra state columns
325- if new_code in state_codes :
335+ if new_code in state_codes and not from_code in state_codes :
326336 state_codes .remove (new_code )
327337 df .drop (columns = state_codes , inplace = True )
338+ elif new_code in state_codes and from_code in state_codes :
339+ state_codes .remove (new_code )
340+ state_codes .remove (from_code )
341+ df .drop (columns = state_codes , inplace = True )
328342
329343 return df
330344
@@ -361,6 +375,9 @@ def replace_geocode(
361375 new_code: {'fips', 'zip', 'state_code', 'state_id', 'state_name', 'hrr', 'msa',
362376 'hhs_region_number'}
363377 Specifies the geocode type of the data in new_col.
378+ date_col: str or None, default "date"
379+ Specify which column contains the date values. Used for value aggregation.
380+ If None, then the aggregation is done only on geo_id.
364381 data_cols: list, default None
365382 A list of data column names to aggregate when doing a weighted coding. If set to
366383 None, then all the columns are used except for date_col and new_col.
@@ -389,12 +406,17 @@ def replace_geocode(
389406 # Multiply and aggregate (this automatically zeros NAs)
390407 df [data_cols ] = df [data_cols ].multiply (df ["weight" ], axis = 0 )
391408 df .drop ("weight" , axis = 1 , inplace = True )
392- df = df .groupby ([date_col , new_col ]).sum ().reset_index ()
409+
410+ if not date_col is None :
411+ df = df .groupby ([date_col , new_col ]).sum ().reset_index ()
412+ else :
413+ df = df .groupby ([new_col ]).sum ().reset_index ()
393414 return df
394415
395- def add_population_column (self , data , geocode_type , geocode_col = None ):
416+ def add_population_column (self , data , geocode_type , geocode_col = None , dropna = True ):
396417 """
397- Appends a population column to a dateframe, based on the FIPS or ZIP code.
418+ Appends a population column to a dataframe, based on the FIPS or ZIP code. If no
419+ dataframe is provided, the full crosswalk from geocode to population is returned.
398420
399421 Parameters
400422 ---------
@@ -412,24 +434,26 @@ def add_population_column(self, data, geocode_type, geocode_col=None):
412434 A dataframe with a population column appended.
413435 """
414436 geocode_col = geocode_type if geocode_col is None else geocode_col
437+ data = data .copy ()
415438
416439 if geocode_type not in ["fips" , "zip" ]:
417440 raise ValueError (
418441 "Only fips and zip geocodes supported. \
419442 For other codes, aggregate those."
420443 )
421444
445+ pop_df = self ._load_crosswalk (from_code = geocode_type , to_code = "pop" )
446+
422447 if not is_string_dtype (data [geocode_col ]):
423448 data [geocode_col ] = data [geocode_col ].astype (str ).str .zfill (5 )
424449
425- pop_df = self ._load_crosswalk (from_code = geocode_type , to_code = "pop" )
426-
450+ merge_type = "inner" if dropna else "left"
427451 data_with_pop = (
428- data . copy ()
429- .merge (pop_df , left_on = geocode_col , right_on = geocode_type , how = "inner" )
452+ data
453+ .merge (pop_df , left_on = geocode_col , right_on = geocode_type , how = merge_type )
430454 .rename (columns = {"pop" : "population" })
431455 )
432- data_with_pop [ "population" ] = data_with_pop [ "population" ]. astype ( int )
456+
433457 return data_with_pop
434458
435459 @staticmethod
0 commit comments