@@ -16,6 +16,7 @@ g_insufficient_data_geos_nssp <- c(g_insufficient_data_geos, "wy")
1616g_time_value_adjust <- 3
1717g_fetch_args <- epidatr :: fetch_args_list(return_empty = FALSE , timeout_seconds = 400 )
1818g_disease <- " covid"
19+ g_s3_prefix <- " exploration"
1920g_external_object_name <- glue :: glue(" exploration/2024-2025_{g_disease}_hosp_forecasts.parquet" )
2021# date to cut the truth data off at, so we don't have too much of the past
2122g_truth_data_date <- " 2023-09-01"
@@ -153,19 +154,19 @@ parameters_and_date_targets <- rlang::list2(
153154 command = " scripts/reports/ongoing_score_report.Rmd"
154155 ),
155156 tar_file(
156- score_report_rmd ,
157+ name = score_report_rmd ,
157158 command = " scripts/reports/score_report.Rmd"
158159 ),
159160 tar_file(
160- covid_geo_exclusions ,
161+ name = covid_geo_exclusions ,
161162 command = " scripts/covid_geo_exclusions.csv"
162163 ),
163164 tar_file(
164- covid_nssp_geo_exclusions ,
165+ name = covid_nssp_geo_exclusions ,
165166 command = " scripts/covid_nssp_geo_exclusions.csv"
166167 ),
167168 tar_file(
168- covid_data_substitutions ,
169+ name = covid_data_substitutions ,
169170 command = " scripts/covid_data_substitutions.csv"
170171 ),
171172 tar_change(
@@ -176,15 +177,15 @@ parameters_and_date_targets <- rlang::list2(
176177 }
177178 ),
178179 tar_target(
179- nhsn_latest_data ,
180+ name = nhsn_latest_data ,
180181 command = {
181182 nhsn_archive_data %> %
182183 epix_as_of(min(Sys.Date(), nhsn_archive_data $ versions_end )) %> %
183184 filter(geo_value %nin % g_insufficient_data_geos )
184185 }
185186 ),
186187 tar_change(
187- nssp_archive_data ,
188+ name = nssp_archive_data ,
188189 change = max(
189190 get_covidcast_signal_last_update(" nssp" , " pct_ed_visits_covid" , " state" ),
190191 get_socrata_updated_at(" https://data.cdc.gov/api/views/mpgq-jmmr" , lubridate :: now(tz = " UTC" ))
@@ -194,7 +195,7 @@ parameters_and_date_targets <- rlang::list2(
194195 }
195196 ),
196197 tar_target(
197- nssp_latest_data ,
198+ name = nssp_latest_data ,
198199 command = {
199200 nssp_archive_data %> %
200201 epix_as_of(min(Sys.Date(), nssp_archive_data $ versions_end ))
@@ -647,7 +648,64 @@ ensemble_targets <- tar_map(
647648
648649
649650# ================================ SCORE TARGETS ================================
650- score_targets <- list2(
651+ external_forecast_targets <- tar_map(
652+ values = tibble(
653+ forecast_date_int = seq(as.Date(" 2024-11-23" ), round_date(Sys.Date() - 3 , " week" , 6 ), by = " week" )
654+ ) %> %
655+ mutate(
656+ forecast_date_chr = as.character(as.Date(forecast_date_int )),
657+ filename = paste0(g_s3_prefix , " /" , forecast_date_chr , " /" , g_disease , " _forecasts.parquet" ),
658+ ),
659+ names = " forecast_date_chr" ,
660+ tar_change(
661+ name = external_forecasts ,
662+ change = get_s3_object_last_modified(filename , " forecasting-team-data" ),
663+ command = {
664+ get_external_forecasts(filename )
665+ }
666+ ),
667+ tar_target(
668+ name = score_external_nhsn_forecasts ,
669+ command = {
670+ score_forecasts(nhsn_latest_data , external_forecasts , " wk inc covid hosp" )
671+ }
672+ ),
673+ tar_target(
674+ name = score_external_nssp_forecasts ,
675+ command = {
676+ score_forecasts(
677+ nssp_latest_data %> % mutate(value = nssp ),
678+ external_forecasts ,
679+ " wk inc covid prop ed visits" )
680+ }
681+ )
682+ )
683+
684+ combined_targets <- list2(
685+ tar_combine(
686+ name = external_forecasts_full ,
687+ external_forecast_targets [[" external_forecasts" ]],
688+ command = {
689+ dplyr :: bind_rows(!!! .x )
690+ }
691+ ),
692+ tar_combine(
693+ name = external_scores_nhsn_full ,
694+ external_forecast_targets [[" score_external_nhsn_forecasts" ]],
695+ command = {
696+ dplyr :: bind_rows(!!! .x )
697+ }
698+ ),
699+ tar_combine(
700+ name = external_scores_nssp_full ,
701+ external_forecast_targets [[" score_external_nssp_forecasts" ]],
702+ command = {
703+ dplyr :: bind_rows(!!! .x )
704+ }
705+ )
706+ )
707+
708+ list2(
651709 tar_change(
652710 external_forecasts ,
653711 change = get_s3_object_last_modified(g_external_object_name , " forecasting-team-data" ),
@@ -663,7 +721,9 @@ score_targets <- list2(
663721 dplyr :: bind_rows(!!! .x ),
664722 external_forecasts %> %
665723 filter(target == " wk inc covid hosp" ) %> %
666- select(- target )
724+ select(- target ),
725+ min_locations = 52 ,
726+ min_dates = 40
667727 )
668728 }
669729 ),
@@ -675,7 +735,10 @@ score_targets <- list2(
675735 dplyr :: bind_rows(!!! .x ),
676736 external_forecasts %> %
677737 filter(target == " wk inc covid prop ed visits" ) %> %
678- select(- target )
738+ select(- target ) %> %
739+ mutate(value = value * 100 ),
740+ min_locations = 50 ,
741+ min_dates = 14
679742 )
680743 }
681744 ),
@@ -706,27 +769,66 @@ if (g_backtest_mode) {
706769} else {
707770 # Only render the report if there is only one forecast date
708771 # i.e. we're running this in prod on schedule
709- score_notebook <- tar_target(
710- ongoing_score_notebook ,
711- command = {
772+ score_notebook <- list2(
773+ tar_target(
774+ ongoing_nhsn_score_notebook ,
775+ command = {
712776 if (! dir.exists(here :: here(" reports" ))) {
713777 dir.create(here :: here(" reports" ))
714778 }
779+ # Don't run if there aren't forecasts in the past 4 weeks to evaluate
780+ if (external_forecasts_full %> %
781+ filter(
782+ forecast_date > = round_date(Sys.Date() - 3 , " week" , 6 ) - 4 * 7 ,
783+ target == " wk inc covid hosp" ) %> % distinct(forecast_date ) %> % nrow() == 0 ) {
784+ return ()
785+ }
715786 rmarkdown :: render(
716787 ongoing_score_report_rmd ,
717788 output_file = here :: here(
718789 " reports" ,
719- sprintf(" %s_covid_scoring .html" , as.Date(Sys.Date()))
790+ sprintf(" %s_covid_nhsn_scoring .html" , as.Date(Sys.Date()))
720791 ),
721792 params = list (
722793 disease = " covid" ,
723794 target = " nhsn" ,
724- external_forecasts = external_forecasts ,
725- nhsn_archive = nhsn_archive_data ,
726- scores_nhsn = scores_nhsn
795+ external_forecasts = external_forecasts_full % > % filter( target == " wk inc covid hosp " ) % > % select( - target ) ,
796+ archive = nhsn_archive_data ,
797+ scores = external_scores_nhsn_full
727798 )
728799 )
729- }
800+ }
801+ ),
802+ tar_target(
803+ ongoing_nssp_score_notebook ,
804+ command = {
805+ if (! dir.exists(here :: here(" reports" ))) {
806+ dir.create(here :: here(" reports" ))
807+ }
808+ # Don't run if there aren't forecasts in the past 4 weeks to evaluate
809+ if (external_forecasts_full %> %
810+ filter(
811+ forecast_date > = round_date(Sys.Date() - 3 , " week" , 6 ) - 4 * 7 ,
812+ target == " wk inc covid prop ed visits"
813+ ) %> % distinct(forecast_date ) %> % nrow() == 0 ) {
814+ return ()
815+ }
816+ rmarkdown :: render(
817+ ongoing_score_report_rmd ,
818+ output_file = here :: here(
819+ " reports" ,
820+ sprintf(" %s_covid_nssp_scoring.html" , as.Date(Sys.Date()))
821+ ),
822+ params = list (
823+ disease = " covid" ,
824+ target = " nssp" ,
825+ external_forecasts = external_forecasts_full %> % filter(target == " wk inc covid prop ed visits" ) %> % select(- target ),
826+ archive = nssp_archive_data ,
827+ scores = external_scores_nssp_full
828+ )
829+ )
830+ }
831+ ),
730832 )
731833}
732834
@@ -736,6 +838,7 @@ list2(
736838 ensemble_targets ,
737839 combined_nhsn_forecasts ,
738840 combined_nssp_forecasts ,
739- score_targets ,
841+ external_forecast_targets ,
842+ combined_targets ,
740843 score_notebook
741844)
0 commit comments