From 46ff941619e56c0450074e9b66ba3348c562325e Mon Sep 17 00:00:00 2001 From: andrew Date: Wed, 4 Nov 2020 09:29:38 -0800 Subject: [PATCH 1/4] refactor smoother --- usafacts/delphi_usafacts/run.py | 15 +++++------- usafacts/delphi_usafacts/smooth.py | 38 ------------------------------ 2 files changed, 6 insertions(+), 47 deletions(-) delete mode 100644 usafacts/delphi_usafacts/smooth.py diff --git a/usafacts/delphi_usafacts/run.py b/usafacts/delphi_usafacts/run.py index cc3e661c9..0b26542e8 100644 --- a/usafacts/delphi_usafacts/run.py +++ b/usafacts/delphi_usafacts/run.py @@ -15,19 +15,15 @@ read_params, create_export_csv, S3ArchiveDiffer, - GeoMapper + GeoMapper, + Smoother ) from .geo import geo_map from .pull import pull_usafacts_data -from .smooth import ( - identity, - kday_moving_average, -) # global constants -seven_day_moving_average = partial(kday_moving_average, k=7) METRICS = [ "confirmed", "deaths", @@ -55,9 +51,10 @@ # "incidence": ("incid_prop", False), # "cumulative_prop": ("cumul_prop", False), # } + SMOOTHERS_MAP = { - "unsmoothed": (identity, '', False, lambda d: d - timedelta(days=7)), - "seven_day_average": (seven_day_moving_average, '7dav_', True, lambda d: d), + "unsmoothed": (Smoother("identity"), "", False, lambda d: d - timedelta(days=7)), + "seven_day_average": (Smoother("moving_average", window_length=7), "7dav_", True, lambda d: d), } GEO_RESOLUTIONS = [ "county", @@ -99,7 +96,7 @@ def run_module(): df = dfs[metric] # Aggregate to appropriate geographic resolution df = geo_map(df, geo_res, map_df, sensor) - df["val"] = SMOOTHERS_MAP[smoother][0](df[sensor].values) + df["val"] = SMOOTHERS_MAP[smoother][0].smooth(df[sensor].values) df["se"] = np.nan df["sample_size"] = np.nan # Drop early entries where data insufficient for smoothing diff --git a/usafacts/delphi_usafacts/smooth.py b/usafacts/delphi_usafacts/smooth.py deleted file mode 100644 index 86051fdf2..000000000 --- a/usafacts/delphi_usafacts/smooth.py +++ /dev/null @@ -1,38 +0,0 @@ -'''Functions for smoothing signals.''' -# -*- coding: utf-8 -*- -import numpy as np - -def identity(x): - '''Trivial "smoother" that does no smoothing. - - Parameters - ---------- - x: np.ndarray - Input array - - Returns - ------- - np.ndarray: - Same as x - ''' - return x - -def kday_moving_average(x, k): - '''Compute k-day moving average on x. - - Parameters - ---------- - x: np.ndarray - Input array - - Returns - ------- - np.ndarray: - k-day moving average. The first k-1 entries are np.nan. - ''' - if not isinstance(k, int): - raise ValueError('k must be int.') - # temp = np.append(np.zeros(k - 1), x) - temp = np.append(np.nan*np.ones(k-1), x) - y = np.convolve(temp, np.ones(k, dtype=int), 'valid') / k - return y From 344b38e445f6f6f4ce0968ab2660a0c9a261880c Mon Sep 17 00:00:00 2001 From: andrew Date: Wed, 4 Nov 2020 09:30:26 -0800 Subject: [PATCH 2/4] Remove old smooth test --- usafacts/tests/test_smooth.py | 30 ------------------------------ 1 file changed, 30 deletions(-) delete mode 100644 usafacts/tests/test_smooth.py diff --git a/usafacts/tests/test_smooth.py b/usafacts/tests/test_smooth.py deleted file mode 100644 index b50089530..000000000 --- a/usafacts/tests/test_smooth.py +++ /dev/null @@ -1,30 +0,0 @@ -import pytest - -from os import listdir -from os.path import join - -import numpy as np -import pandas as pd -from delphi_usafacts.run import run_module - -class TestSmooth: - def test_output_files_smoothed(self, run_as_module): - - dates = [str(x) for x in range(20200304, 20200311)] - - smoothed = pd.read_csv( - join("receiving", - f"{dates[-1]}_state_confirmed_7dav_cumulative_num.csv") - ) - - raw = pd.concat([ - pd.read_csv( - join("receiving", - f"{date}_state_confirmed_cumulative_num.csv") - ) for date in dates - ]) - - raw = raw.groupby('geo_id')['val'].mean() - df = pd.merge(smoothed, raw, on='geo_id', suffixes=('_smoothed', '_raw')) - - assert np.allclose(df['val_smoothed'].values, df['val_raw'].values) From 2e2b84970fb9e16a36fcec795413564b94ca96fa Mon Sep 17 00:00:00 2001 From: andrew Date: Mon, 9 Nov 2020 07:55:54 -0800 Subject: [PATCH 3/4] remove unused import --- usafacts/delphi_usafacts/run.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/usafacts/delphi_usafacts/run.py b/usafacts/delphi_usafacts/run.py index 0b26542e8..ba72f5c73 100644 --- a/usafacts/delphi_usafacts/run.py +++ b/usafacts/delphi_usafacts/run.py @@ -6,7 +6,6 @@ """ from datetime import datetime, date, time, timedelta from itertools import product -from functools import partial from os.path import join import numpy as np @@ -22,7 +21,6 @@ from .geo import geo_map from .pull import pull_usafacts_data - # global constants METRICS = [ "confirmed", From 983d6910101b9a07bcf0391b965e3bed1bcea325 Mon Sep 17 00:00:00 2001 From: andrew Date: Tue, 10 Nov 2020 08:16:27 -0800 Subject: [PATCH 4/4] Reorder imports --- usafacts/delphi_usafacts/run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/usafacts/delphi_usafacts/run.py b/usafacts/delphi_usafacts/run.py index ba72f5c73..fd6e8cb47 100644 --- a/usafacts/delphi_usafacts/run.py +++ b/usafacts/delphi_usafacts/run.py @@ -11,10 +11,10 @@ import numpy as np import pandas as pd from delphi_utils import ( - read_params, create_export_csv, - S3ArchiveDiffer, + read_params, GeoMapper, + S3ArchiveDiffer, Smoother )