diff --git a/usafacts/delphi_usafacts/run.py b/usafacts/delphi_usafacts/run.py index cc3e661c9..fd6e8cb47 100644 --- a/usafacts/delphi_usafacts/run.py +++ b/usafacts/delphi_usafacts/run.py @@ -6,28 +6,22 @@ """ from datetime import datetime, date, time, timedelta from itertools import product -from functools import partial from os.path import join import numpy as np import pandas as pd from delphi_utils import ( - read_params, create_export_csv, + read_params, + GeoMapper, S3ArchiveDiffer, - GeoMapper + Smoother ) from .geo import geo_map from .pull import pull_usafacts_data -from .smooth import ( - identity, - kday_moving_average, -) - # global constants -seven_day_moving_average = partial(kday_moving_average, k=7) METRICS = [ "confirmed", "deaths", @@ -55,9 +49,10 @@ # "incidence": ("incid_prop", False), # "cumulative_prop": ("cumul_prop", False), # } + SMOOTHERS_MAP = { - "unsmoothed": (identity, '', False, lambda d: d - timedelta(days=7)), - "seven_day_average": (seven_day_moving_average, '7dav_', True, lambda d: d), + "unsmoothed": (Smoother("identity"), "", False, lambda d: d - timedelta(days=7)), + "seven_day_average": (Smoother("moving_average", window_length=7), "7dav_", True, lambda d: d), } GEO_RESOLUTIONS = [ "county", @@ -99,7 +94,7 @@ def run_module(): df = dfs[metric] # Aggregate to appropriate geographic resolution df = geo_map(df, geo_res, map_df, sensor) - df["val"] = SMOOTHERS_MAP[smoother][0](df[sensor].values) + df["val"] = SMOOTHERS_MAP[smoother][0].smooth(df[sensor].values) df["se"] = np.nan df["sample_size"] = np.nan # Drop early entries where data insufficient for smoothing diff --git a/usafacts/delphi_usafacts/smooth.py b/usafacts/delphi_usafacts/smooth.py deleted file mode 100644 index 20524b340..000000000 --- a/usafacts/delphi_usafacts/smooth.py +++ /dev/null @@ -1,39 +0,0 @@ -"""Functions for smoothing signals.""" -# -*- coding: utf-8 -*- -"""Smoothing functions.""" -import numpy as np - -def identity(x): - """Trivial "smoother" that does no smoothing. - - Parameters - ---------- - x: np.ndarray - Input array - - Returns - ------- - np.ndarray: - Same as x - """ - return x - -def kday_moving_average(x, k): - """Compute k-day moving average on x. - - Parameters - ---------- - x: np.ndarray - Input array - - Returns - ------- - np.ndarray: - k-day moving average. The first k-1 entries are np.nan. - """ - if not isinstance(k, int): - raise ValueError('k must be int.') - # temp = np.append(np.zeros(k - 1), x) - temp = np.append(np.nan*np.ones(k-1), x) - y = np.convolve(temp, np.ones(k, dtype=int), 'valid') / k - return y diff --git a/usafacts/tests/test_smooth.py b/usafacts/tests/test_smooth.py deleted file mode 100644 index b50089530..000000000 --- a/usafacts/tests/test_smooth.py +++ /dev/null @@ -1,30 +0,0 @@ -import pytest - -from os import listdir -from os.path import join - -import numpy as np -import pandas as pd -from delphi_usafacts.run import run_module - -class TestSmooth: - def test_output_files_smoothed(self, run_as_module): - - dates = [str(x) for x in range(20200304, 20200311)] - - smoothed = pd.read_csv( - join("receiving", - f"{dates[-1]}_state_confirmed_7dav_cumulative_num.csv") - ) - - raw = pd.concat([ - pd.read_csv( - join("receiving", - f"{date}_state_confirmed_cumulative_num.csv") - ) for date in dates - ]) - - raw = raw.groupby('geo_id')['val'].mean() - df = pd.merge(smoothed, raw, on='geo_id', suffixes=('_smoothed', '_raw')) - - assert np.allclose(df['val_smoothed'].values, df['val_raw'].values)