From 00078fe66b41f81770aac1bfe2e85e014b49afe6 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 21 Mar 2023 10:17:04 -0400 Subject: [PATCH 1/5] add params flag to toggle backfill file generation in quidel --- .../quidel_covidtest-params-prod.json.j2 | 1 + .../delphi_quidel_covidtest/run.py | 28 +++++++++++-------- quidel_covidtest/params.json.template | 1 + 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/ansible/templates/quidel_covidtest-params-prod.json.j2 b/ansible/templates/quidel_covidtest-params-prod.json.j2 index c8f7ebf49..c093be3ef 100644 --- a/ansible/templates/quidel_covidtest-params-prod.json.j2 +++ b/ansible/templates/quidel_covidtest-params-prod.json.j2 @@ -10,6 +10,7 @@ "export_end_date": "", "pull_start_date": "2020-05-26", "pull_end_date":"", + "generate_backfill_files": true, "backfill_dir": "/common/backfill/quidel_covidtest", "backfill_merge_day": 0, "export_day_range":40, diff --git a/quidel_covidtest/delphi_quidel_covidtest/run.py b/quidel_covidtest/delphi_quidel_covidtest/run.py index 4f7f7fc8e..b74d617d7 100644 --- a/quidel_covidtest/delphi_quidel_covidtest/run.py +++ b/quidel_covidtest/delphi_quidel_covidtest/run.py @@ -86,8 +86,6 @@ def run_module(params: Dict[str, Any]): stats = [] atexit.register(log_exit, start_time, stats, logger) cache_dir = params["indicator"]["input_cache_dir"] - backfill_dir = params["indicator"]["backfill_dir"] - backfill_merge_day = params["indicator"]["backfill_merge_day"] export_dir = params["common"]["export_dir"] export_start_date = params["indicator"]["export_start_date"] export_end_date = params["indicator"]["export_end_date"] @@ -95,15 +93,23 @@ def run_module(params: Dict[str, Any]): # Pull data and update export date df, _end_date = pull_quidel_covidtest(params["indicator"], logger) - # Merge 4 weeks' data into one file to save runtime - # Notice that here we don't check the _end_date(receive date) - # since we always want such merging happens on a certain day of a week - merge_backfill_file(backfill_dir, backfill_merge_day, datetime.today()) - if _end_date is None: - logger.info("The data is up-to-date. Currently, no new data to be ingested.") - return - # Store the backfill intermediate file - store_backfill_file(df, _end_date, backfill_dir) + + # Allow user to turn backfill file generation on or off. Defaults to True + # (generate files). + if params["indicator"].get("generate_backfill_files", True): + backfill_dir = params["indicator"]["backfill_dir"] + backfill_merge_day = params["indicator"]["backfill_merge_day"] + + # Merge 4 weeks' data into one file to save runtime + # Notice that here we don't check the _end_date(receive date) + # since we always want such merging happens on a certain day of a week + merge_backfill_file(backfill_dir, backfill_merge_day, datetime.today()) + if _end_date is None: + logger.info("The data is up-to-date. Currently, no new data to be ingested.") + return + # Store the backfill intermediate file + store_backfill_file(df, _end_date, backfill_dir) + export_end_date = check_export_end_date( export_end_date, _end_date, END_FROM_TODAY_MINUS) export_start_date = check_export_start_date( diff --git a/quidel_covidtest/params.json.template b/quidel_covidtest/params.json.template index d05ecd25e..57574243b 100644 --- a/quidel_covidtest/params.json.template +++ b/quidel_covidtest/params.json.template @@ -7,6 +7,7 @@ "indicator": { "static_file_dir": "./static", "input_cache_dir": "./cache", + "generate_backfill_files": false, "backfill_dir": "./backfill", "backfill_merge_day": 0, "export_start_date": "2020-05-26", From 5e25f4a487db90c016b88e97b6ff00d78431d002 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 21 Mar 2023 10:56:41 -0400 Subject: [PATCH 2/5] add params flag to toggle backfill file generation in changehc --- .../templates/changehc-params-prod.json.j2 | 1 + changehc/delphi_changehc/load_data.py | 30 +++++++++++-------- changehc/delphi_changehc/run.py | 20 ++++++++----- changehc/params.json.template | 1 + 4 files changed, 33 insertions(+), 19 deletions(-) diff --git a/ansible/templates/changehc-params-prod.json.j2 b/ansible/templates/changehc-params-prod.json.j2 index 8c6aeee7e..c365fa605 100644 --- a/ansible/templates/changehc-params-prod.json.j2 +++ b/ansible/templates/changehc-params-prod.json.j2 @@ -16,6 +16,7 @@ "start_date": null, "end_date": null, "drop_date": null, + "generate_backfill_files": true, "backfill_dir": "/common/backfill/chng", "backfill_merge_day": 0, "n_backfill_days": 60, diff --git a/changehc/delphi_changehc/load_data.py b/changehc/delphi_changehc/load_data.py index 3f28f49d1..c22c2483a 100644 --- a/changehc/delphi_changehc/load_data.py +++ b/changehc/delphi_changehc/load_data.py @@ -78,7 +78,8 @@ def load_chng_data(filepath, dropdate, base_geo, def load_combined_data(denom_filepath, covid_filepath, base_geo, - backfill_dir, geo, weekday, numtype, backfill_merge_day): + backfill_dir, geo, weekday, numtype, + generate_backfill_files, backfill_merge_day): """Load in denominator and covid data, and combine them. Args: @@ -114,15 +115,17 @@ def load_combined_data(denom_filepath, covid_filepath, base_geo, data = data[["num", "den"]] # Store for backfill - merge_backfill_file(backfill_dir, numtype, geo, weekday, backfill_merge_day, - issue_date, test_mode=False, check_nd=25) - store_backfill_file(data, issue_date, backfill_dir, numtype, geo, weekday) + if generate_backfill_files: + merge_backfill_file(backfill_dir, numtype, geo, weekday, backfill_merge_day, + issue_date, test_mode=False, check_nd=25) + store_backfill_file(data, issue_date, backfill_dir, numtype, geo, weekday) return data def load_cli_data(denom_filepath, flu_filepath, mixed_filepath, flu_like_filepath, covid_like_filepath, base_geo, - backfill_dir, geo, weekday, numtype, backfill_merge_day): + backfill_dir, geo, weekday, numtype, + generate_backfill_files, backfill_merge_day): """Load in denominator and covid-like data, and combine them. Args: @@ -172,14 +175,16 @@ def load_cli_data(denom_filepath, flu_filepath, mixed_filepath, flu_like_filepat data = data[["num", "den"]] # Store for backfill - merge_backfill_file(backfill_dir, numtype, geo, weekday, backfill_merge_day, - issue_date, test_mode=False, check_nd=25) - store_backfill_file(data, issue_date, backfill_dir, numtype, geo, weekday) + if generate_backfill_files: + merge_backfill_file(backfill_dir, numtype, geo, weekday, backfill_merge_day, + issue_date, test_mode=False, check_nd=25) + store_backfill_file(data, issue_date, backfill_dir, numtype, geo, weekday) return data def load_flu_data(denom_filepath, flu_filepath, base_geo, - backfill_dir, geo, weekday, numtype, backfill_merge_day): + backfill_dir, geo, weekday, numtype, + generate_backfill_files, backfill_merge_day): """Load in denominator and flu data, and combine them. Args: @@ -215,7 +220,8 @@ def load_flu_data(denom_filepath, flu_filepath, base_geo, data = data[["num", "den"]] # Store for backfill - merge_backfill_file(backfill_dir, numtype, geo, weekday, backfill_merge_day, - issue_date, test_mode=False, check_nd=25) - store_backfill_file(data, issue_date, backfill_dir, numtype, geo, weekday) + if generate_backfill_files: + merge_backfill_file(backfill_dir, numtype, geo, weekday, backfill_merge_day, + issue_date, test_mode=False, check_nd=25) + store_backfill_file(data, issue_date, backfill_dir, numtype, geo, weekday) return data diff --git a/changehc/delphi_changehc/run.py b/changehc/delphi_changehc/run.py index e73cdd29b..f6a0a6bd3 100644 --- a/changehc/delphi_changehc/run.py +++ b/changehc/delphi_changehc/run.py @@ -15,8 +15,7 @@ # first party from .download_ftp_files import download_counts -from .load_data import (load_combined_data, load_cli_data, load_flu_data, - store_backfill_file, merge_backfill_file) +from .load_data import (load_combined_data, load_cli_data, load_flu_data) from .update_sensor import CHCSensorUpdater @@ -134,8 +133,14 @@ def run_module(params: Dict[str, Dict[str, Any]]): # range of estimates to produce n_backfill_days = params["indicator"]["n_backfill_days"] # produce estimates for n_backfill_days n_waiting_days = params["indicator"]["n_waiting_days"] # most recent n_waiting_days won't be est - backfill_dir = params["indicator"]["backfill_dir"] - backfill_merge_day = params["indicator"]["backfill_merge_day"] + + generate_backfill_files = params["indicator"].get("generate_backfill_files", True) + backfill_dir = "" + backfill_merge_day = 0 + if generate_backfill_files: + backfill_dir = params["indicator"]["backfill_dir"] + backfill_merge_day = params["indicator"]["backfill_merge_day"] + enddate_dt = dropdate_dt - timedelta(days=n_waiting_days) startdate_dt = enddate_dt - timedelta(days=n_backfill_days) enddate = str(enddate_dt.date()) @@ -185,15 +190,16 @@ def run_module(params: Dict[str, Dict[str, Any]]): data = load_combined_data(file_dict["denom"], file_dict["covid"], "fips", backfill_dir, geo, weekday, numtype, - backfill_merge_day) + generate_backfill_files, backfill_merge_day) elif numtype == "cli": data = load_cli_data(file_dict["denom"],file_dict["flu"],file_dict["mixed"], file_dict["flu_like"],file_dict["covid_like"], "fips", - backfill_dir, geo, weekday, numtype, backfill_merge_day) + backfill_dir, geo, weekday, numtype, + generate_backfill_files, backfill_merge_day) elif numtype == "flu": data = load_flu_data(file_dict["denom"],file_dict["flu"], "fips",backfill_dir, geo, weekday, - numtype, backfill_merge_day) + numtype, generate_backfill_files, backfill_merge_day) more_stats = su_inst.update_sensor( data, params["common"]["export_dir"], diff --git a/changehc/params.json.template b/changehc/params.json.template index 85074cad6..924692781 100644 --- a/changehc/params.json.template +++ b/changehc/params.json.template @@ -17,6 +17,7 @@ "start_date": null, "end_date": null, "drop_date": null, + "generate_backfill_files": false, "backfill_dir": "./backfill", "backfill_merge_day": 0, "n_backfill_days": 60, From d3cb4a9975f18096f9c97700000383bc574b243b Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 21 Mar 2023 10:59:31 -0400 Subject: [PATCH 3/5] add params flag to toggle backfill file generation in claims_hosp --- ansible/templates/claims_hosp-params-prod.json.j2 | 1 + claims_hosp/delphi_claims_hosp/run.py | 9 +++++---- claims_hosp/params.json.template | 1 + 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/ansible/templates/claims_hosp-params-prod.json.j2 b/ansible/templates/claims_hosp-params-prod.json.j2 index c707a5e2f..eda818482 100644 --- a/ansible/templates/claims_hosp-params-prod.json.j2 +++ b/ansible/templates/claims_hosp-params-prod.json.j2 @@ -8,6 +8,7 @@ "start_date": "2020-02-01", "end_date": null, "drop_date": null, + "generate_backfill_files": true, "backfill_dir": "/common/backfill/claims_hosp", "backfill_merge_day": 0, "n_backfill_days": 70, diff --git a/claims_hosp/delphi_claims_hosp/run.py b/claims_hosp/delphi_claims_hosp/run.py index b1685cb00..53c4cd33b 100644 --- a/claims_hosp/delphi_claims_hosp/run.py +++ b/claims_hosp/delphi_claims_hosp/run.py @@ -91,10 +91,11 @@ def run_module(params): startdate = params["indicator"]['start_date'] # Store backfill data - backfill_dir = params["indicator"]["backfill_dir"] - backfill_merge_day = params["indicator"]["backfill_merge_day"] - merge_backfill_file(backfill_dir, backfill_merge_day, datetime.today()) - store_backfill_file(claims_file, dropdate_dt, backfill_dir) + if params["indicator"].get("generate_backfill_files", True): + backfill_dir = params["indicator"]["backfill_dir"] + backfill_merge_day = params["indicator"]["backfill_merge_day"] + merge_backfill_file(backfill_dir, backfill_merge_day, datetime.today()) + store_backfill_file(claims_file, dropdate_dt, backfill_dir) # print out information logger.info("Loaded params", diff --git a/claims_hosp/params.json.template b/claims_hosp/params.json.template index 67bfd4c43..39e284222 100644 --- a/claims_hosp/params.json.template +++ b/claims_hosp/params.json.template @@ -9,6 +9,7 @@ "end_date": null, "drop_date": null, "n_backfill_days": 70, + "generate_backfill_files": false, "backfill_dir": "./backfill", "backfill_merge_day": 0, "n_waiting_days": 3, From c73c19772de812251ff0d24a8f7500bdb0af2542 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 21 Mar 2023 11:11:09 -0400 Subject: [PATCH 4/5] linting --- changehc/delphi_changehc/run.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/changehc/delphi_changehc/run.py b/changehc/delphi_changehc/run.py index f6a0a6bd3..8d4d25261 100644 --- a/changehc/delphi_changehc/run.py +++ b/changehc/delphi_changehc/run.py @@ -143,14 +143,9 @@ def run_module(params: Dict[str, Dict[str, Any]]): enddate_dt = dropdate_dt - timedelta(days=n_waiting_days) startdate_dt = enddate_dt - timedelta(days=n_backfill_days) - enddate = str(enddate_dt.date()) - startdate = str(startdate_dt.date()) - # now allow manual overrides - if params["indicator"]["end_date"] is not None: - enddate = params["indicator"]["end_date"] - if params["indicator"]["start_date"] is not None: - startdate = params["indicator"]["start_date"] + enddate = enddate = params["indicator"].get("end_date",str(enddate_dt.date())) + startdate = params["indicator"].get("start_date", str(startdate_dt.date())) logger.info("generating signal and exporting to CSV", first_sensor_date = startdate, From 0361649fb41a03cc0af7b5f9206243dbfa024b03 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 21 Mar 2023 11:14:12 -0400 Subject: [PATCH 5/5] add new arg in tests --- changehc/tests/test_backfill.py | 2 +- changehc/tests/test_load_data.py | 8 ++++---- changehc/tests/test_sensor.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/changehc/tests/test_backfill.py b/changehc/tests/test_backfill.py index e275a16ab..8acedc4c6 100644 --- a/changehc/tests/test_backfill.py +++ b/changehc/tests/test_backfill.py @@ -35,7 +35,7 @@ combined_data = load_combined_data(DENOM_FILEPATH, COVID_FILEPATH, "fips", backfill_dir, geo, weekday, "covid", - backfill_merge_day) + True, backfill_merge_day) class TestBackfill: diff --git a/changehc/tests/test_load_data.py b/changehc/tests/test_load_data.py index 749089583..9ce6f94f8 100644 --- a/changehc/tests/test_load_data.py +++ b/changehc/tests/test_load_data.py @@ -39,9 +39,9 @@ class TestLoadData: Config.COVID_COLS, Config.COVID_DTYPES, Config.COVID_COL) combined_data = load_combined_data(DENOM_FILEPATH, COVID_FILEPATH, "fips", backfill_dir, geo, weekday, "covid", - backfill_merge_day) + True, backfill_merge_day) flu_data = load_flu_data(DENOM_FILEPATH, FLU_FILEPATH, "fips", - backfill_dir, geo, weekday, "flu", backfill_merge_day) + backfill_dir, geo, weekday, "flu", True, backfill_merge_day) gmpr = GeoMapper() def test_base_unit(self): @@ -55,11 +55,11 @@ def test_base_unit(self): with pytest.raises(AssertionError): load_combined_data(DENOM_FILEPATH, COVID_FILEPATH, "foo", - backfill_dir, geo, weekday, "covid", backfill_merge_day) + backfill_dir, geo, weekday, "covid", True, backfill_merge_day) with pytest.raises(AssertionError): load_flu_data(DENOM_FILEPATH, FLU_FILEPATH, "foo", - backfill_dir, geo, weekday, "covid", backfill_merge_day) + backfill_dir, geo, weekday, "covid", True, backfill_merge_day) def test_denom_columns(self): assert "fips" in self.denom_data.index.names diff --git a/changehc/tests/test_sensor.py b/changehc/tests/test_sensor.py index 05dcc7a25..93e214892 100644 --- a/changehc/tests/test_sensor.py +++ b/changehc/tests/test_sensor.py @@ -32,7 +32,7 @@ class TestLoadData: combined_data = load_combined_data(DENOM_FILEPATH, COVID_FILEPATH, "fips", backfill_dir, geo, weekday, "covid", - backfill_merge_day) + True, backfill_merge_day) def test_backfill(self): num0 = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=float).reshape(-1, 1)