|
| 1 | +--- |
| 2 | +title: "`r params$disease` `r params$target` score report" |
| 3 | +author: Delphi Forecast Team |
| 4 | +date: "Rendered: `r format(Sys.time(), '%Y-%m-%d %H:%M:%S')`" |
| 5 | +output: |
| 6 | + html_document: |
| 7 | + code_folding: hide |
| 8 | + toc: True |
| 9 | + fig_crop: no |
| 10 | + # self_contained: False |
| 11 | + # lib_dir: libs |
| 12 | +params: |
| 13 | + disease: "covid" |
| 14 | + target: "" |
| 15 | + nhsn_archive: "" |
| 16 | + scores_nhsn: "" |
| 17 | + external_forecasts: "" |
| 18 | +--- |
| 19 | + |
| 20 | +```{css, echo=FALSE} |
| 21 | +img { |
| 22 | + max-width:125%; |
| 23 | + height:auto; |
| 24 | +} |
| 25 | +``` |
| 26 | + |
| 27 | +```{r setup, include=FALSE} |
| 28 | +knitr::opts_chunk$set(warning=FALSE, message=FALSE) |
| 29 | +ggplot2::theme_set(ggplot2::theme_bw()) |
| 30 | +options(width=500) |
| 31 | +library(ggnewscale) |
| 32 | +#suppressPackageStartupMessages(source("R/load_all.R")) |
| 33 | +## external_forecasts <- qs2::qs_read(here::here("covid_hosp_prod", "objects", "external_forecasts")) |
| 34 | +## nhsn_archive <- qs2::qs_read(here::here("covid_hosp_prod", "objects", "nhsn_archive_data")) |
| 35 | +## scores_nhsn <- qs2::qs_read(here::here("covid_hosp_prod", "objects", "scores_nhsn")) |
| 36 | +``` |
| 37 | + |
| 38 | +# Recent scores {.tabset} |
| 39 | + |
| 40 | +```{r adjusting data, echo=FALSE, message=FALSE} |
| 41 | +forecasts <- external_forecasts %>% |
| 42 | + filter(target == "wk inc covid hosp") %>% |
| 43 | + select(-target) |
| 44 | +our_forecasters <- c("linear", "windowed_seasonal", "windowed_seasonal_nssp", "climate_base", "climate_geo_agged", "climate_linear", "ensemble_windowed", "retro_submission", "CMU-TimeSeries", "seasonal_nssp_latest") |
| 45 | +scores_nhsn <- scores_nhsn %>% |
| 46 | + mutate( |
| 47 | + forecaster = |
| 48 | + case_match( |
| 49 | + forecaster, |
| 50 | + "windowed_seasonal_extra_sources" ~ "windowed_seasonal_nssp", |
| 51 | + "ensemble_linclim_windowed_seasonal" ~ "retro_submission", |
| 52 | + "ens_ar_only" ~ "ensemble_windowed", |
| 53 | + .default = forecaster |
| 54 | + )) %>% |
| 55 | + left_join(state_census, by = join_by(geo_value == abbr)) %>% |
| 56 | + mutate(wis_rate = wis * 1e5 / pop) %>% |
| 57 | + mutate(ae_rate = ae_median * 1e5 / pop) %>% |
| 58 | + mutate(pop = factor(pop)) |
| 59 | +Mean <- function(x) mean(x, na.rm = TRUE) |
| 60 | +GeoMean <- function(x, offset = 0) exp(Mean(log(x + offset))) |
| 61 | +``` |
| 62 | +```{r score_summary_functions, echo=FALSE, message=FALSE} |
| 63 | +scores_by_state <- function(scores) { |
| 64 | + scores %>% |
| 65 | + group_by(forecaster, geo_value) %>% |
| 66 | + summarize( |
| 67 | + mean_wis = Mean(wis), |
| 68 | + geo_mean_wis = GeoMean(wis), |
| 69 | + mean_ae = Mean(ae_median), |
| 70 | + geomean_ae = GeoMean(ae_median), |
| 71 | + mean_wis_rate = Mean(wis_rate), |
| 72 | + geo_mean_wis_rate = GeoMean(wis_rate), |
| 73 | + mean_ae_rate = Mean(ae_rate), |
| 74 | + geomean_ae_rate = GeoMean(ae_rate), |
| 75 | + .groups = "drop" |
| 76 | + ) %>% |
| 77 | + arrange(forecaster, mean_wis_rate) |
| 78 | +} |
| 79 | +se_mean_states <- function(scores) { |
| 80 | + scores_by_state(scores) %>% |
| 81 | + group_by(forecaster) %>% |
| 82 | + summarize( |
| 83 | + se_mean_wis = sd(mean_wis) / sqrt(n()), |
| 84 | + se_geomean_wis = sd(geo_mean_wis) / sqrt(n()), |
| 85 | + se_mean_ae = sd(mean_ae) / sqrt(n()), |
| 86 | + se_geomean_ae = sd(geomean_ae) / sqrt(n()), |
| 87 | + se_mean_wis_rate = sd(mean_wis_rate) / sqrt(n()), |
| 88 | + se_geomean_wis_rate = sd(geo_mean_wis_rate) / sqrt(n()), |
| 89 | + se_mean_ae_rate = sd(mean_ae_rate) / sqrt(n()), |
| 90 | + se_geomean_ae_rate = sd(geomean_ae_rate) / sqrt(n()), |
| 91 | + .groups = "drop" |
| 92 | + ) %>% |
| 93 | + mutate(across(starts_with("se"), \(x) round(x, 2))) |
| 94 | +} |
| 95 | +score_summary <- function(scores) { |
| 96 | + forecast_date <- scores %>% |
| 97 | + distinct(forecast_date) %>% |
| 98 | + pull() %>% |
| 99 | + max() |
| 100 | + scores %>% |
| 101 | + group_by(forecaster) %>% |
| 102 | + mutate( |
| 103 | + min_wis = min(wis[wis > 1e-5]), |
| 104 | + min_ae = min(ae_median[ae_median > 1e-5]) |
| 105 | + ) %>% |
| 106 | + summarize( |
| 107 | + mean_wis = round(Mean(wis), 2), |
| 108 | + mean_wis_rate = round(Mean(wis_rate), 2), |
| 109 | + geomean_wis = round(GeoMean(wis, min_wis), 2), |
| 110 | + mean_ae = round(Mean(ae_median), 2), |
| 111 | + mean_ae_rate = round(Mean(ae_rate), 2), |
| 112 | + geomean_ae = round(GeoMean(ae_median, min_ae), 2), |
| 113 | + mean_cov_50 = round(Mean(interval_coverage_50), 2), |
| 114 | + mean_cov_90 = round(Mean(interval_coverage_90), 2), |
| 115 | + n = n(), |
| 116 | + .groups = "drop" |
| 117 | + ) %>% |
| 118 | + left_join(se_mean_states(scores), by = "forecaster") %>% |
| 119 | + arrange(mean_wis) %>% |
| 120 | + select(forecaster, mean_wis, se_mean_wis, mean_wis_rate, se_mean_wis_rate, mean_cov_50, mean_cov_90, geomean_wis) %>% |
| 121 | + mutate(forecast_date = .env$forecast_date) |
| 122 | +} |
| 123 | +datatable_function <- function(score_summary) { |
| 124 | + datatable( |
| 125 | + score_summary, |
| 126 | + fillContainer = FALSE, |
| 127 | + options = list( |
| 128 | + initComplete = htmlwidgets::JS( |
| 129 | + "function(settings, json) {", |
| 130 | + paste0("$(this.api().table().container()).css({'font-size': '", "10pt", "'});"), |
| 131 | + "}"), |
| 132 | + pageLength = 25 |
| 133 | + ) |
| 134 | + ) %>% |
| 135 | + formatStyle("forecaster", target = c("cell"), |
| 136 | + textDecoration = styleEqual("CMU-TimeSeries", "underline")) %>% |
| 137 | + formatStyle( |
| 138 | + "mean_wis", |
| 139 | + background = styleColorBar(c(0, max(score_summary$mean_wis)), 'lightblue'), |
| 140 | + backgroundSize = '98% 88%', |
| 141 | + backgroundRepeat = 'no-repeat', |
| 142 | + backgroundPosition = 'center' |
| 143 | + ) %>% |
| 144 | + formatStyle( |
| 145 | + "mean_wis_rate", |
| 146 | + background = styleColorBar(c(0, max(score_summary$mean_wis_rate)), 'lightblue'), |
| 147 | + backgroundSize = '98% 88%', |
| 148 | + backgroundRepeat = 'no-repeat', |
| 149 | + backgroundPosition = 'center' |
| 150 | + ) %>% |
| 151 | + formatStyle( |
| 152 | + "geomean_wis", |
| 153 | + backgroundound = styleColorBar(c(0, max(score_summary$geomean_wis)), 'lightblue'), |
| 154 | + backgroundSize = '98% 88%', |
| 155 | + backgroundRepeat = 'no-repeat', |
| 156 | + backgroundPosition = 'center' |
| 157 | + ) |
| 158 | +} |
| 159 | +``` |
| 160 | +## Last 8 weeks |
| 161 | + |
| 162 | +```{r datatable_short, out.width= "100%", fig.height = 60, fig.width = 12, echo=FALSE, message=FALSE} |
| 163 | +score_summary_short <- scores_nhsn %>% |
| 164 | + filter(forecast_date > Sys.Date() - 8 * 7) %>% |
| 165 | + score_summary() |
| 166 | +datatable_function(score_summary_short) |
| 167 | +``` |
| 168 | + |
| 169 | +## Last 52 weeks |
| 170 | +```{r datatable_long, fig.height = 60, fig.width = 12, echo=FALSE, message=FALSE} |
| 171 | +score_summary_long <- scores_nhsn %>% |
| 172 | + filter(forecast_date > Sys.Date() - 52 * 7) %>% |
| 173 | + score_summary() |
| 174 | +datatable_function(score_summary_long) |
| 175 | +``` |
| 176 | + |
| 177 | +# Moving 8 week average of scores{.tabset} |
| 178 | + |
| 179 | +```{r moving_average, echo=FALSE, message=FALSE, include=FALSE} |
| 180 | +ordered_scores <- scores_nhsn %>% |
| 181 | + arrange(forecast_date) |
| 182 | +sliding_ordered <- ordered_scores %>% |
| 183 | + slide_index_dfr( |
| 184 | + .i = ordered_scores$forecast_date, |
| 185 | + .f = score_summary, |
| 186 | + .before = lubridate::weeks(8), |
| 187 | + .complete = TRUE |
| 188 | + ) %>% |
| 189 | + group_by(forecaster, forecast_date) %>% |
| 190 | + slice(1) |
| 191 | +sliding_ordered %>% |
| 192 | + ggplot(aes(x=forecast_date, y = mean_wis, color = forecaster)) + |
| 193 | + geom_line() + |
| 194 | + ylim(0, NA) + |
| 195 | + scale_color_viridis_d() |
| 196 | +``` |
| 197 | + |
| 198 | +## Mean WIS |
| 199 | + |
| 200 | +```{r moving_average_wis, echo=FALSE, message=FALSE, out.width="100%"} |
| 201 | +sliding_ordered %>% |
| 202 | + ggplot(aes(x=forecast_date, y = mean_wis, color = forecaster)) + |
| 203 | + ylim(0, NA) + |
| 204 | + geom_line() + |
| 205 | + scale_color_viridis_d() |
| 206 | +``` |
| 207 | + |
| 208 | +## Mean WIS rate |
| 209 | + |
| 210 | +```{r moving_average_wis_rate, echo=FALSE, message=FALSE, out.width="100%"} |
| 211 | +sliding_ordered %>% |
| 212 | + ggplot(aes(x=forecast_date, y = mean_wis_rate, color = forecaster)) + |
| 213 | + ylim(0, NA) + |
| 214 | + geom_line() + |
| 215 | + scale_color_viridis_d() |
| 216 | +``` |
| 217 | + |
| 218 | +## Mean Coverage 90 |
| 219 | + |
| 220 | +```{r moving_average_cov90, echo=FALSE, message=FALSE, out.width="100%"} |
| 221 | +sliding_ordered %>% |
| 222 | + ggplot(aes(x=forecast_date, y = mean_cov_90, color = forecaster)) + |
| 223 | + ylim(0, 1) + |
| 224 | + geom_line() + |
| 225 | + scale_color_viridis_d() |
| 226 | +``` |
| 227 | + |
| 228 | +## Mean Coverage 50 |
| 229 | + |
| 230 | +```{r moving_average_cov50, echo=FALSE, message=FALSE, out.width="100%"} |
| 231 | +sliding_ordered %>% |
| 232 | + ggplot(aes(x=forecast_date, y = mean_cov_50, color = forecaster)) + |
| 233 | + ylim(0, 1) + |
| 234 | + geom_line() + |
| 235 | + scale_color_viridis_d() |
| 236 | +``` |
| 237 | + |
| 238 | +# Large recent data revisions {.tabset} |
| 239 | + |
| 240 | +```{r revisions, echo=FALSE, message=FALSE, out.width="100%"} |
| 241 | +nhsn_recent_archive <- nhsn_archive %>% |
| 242 | + filter(Sys.Date() - time_value < 10*7) |
| 243 | +nhsn_recent_archive$time_type <- "day" |
| 244 | +revision_sum <- nhsn_recent_archive %>% |
| 245 | + epiprocess::revision_analysis(value, min_waiting_period = NULL) |
| 246 | +av_re_spread <- revision_sum$revision_behavior %>% |
| 247 | + group_by(geo_value) %>% |
| 248 | + summarize(rel_spread = mean(rel_spread, na.rm = TRUE)) %>% |
| 249 | + arrange(desc(rel_spread)) %>% |
| 250 | + filter(geo_value %nin% c("vi", "as", "gu")) |
| 251 | +worst_geos <- av_re_spread %>% filter((rel_spread > 0.10)) %>% pull(geo_value) |
| 252 | +worst_geos <- worst_geos[1:9] |
| 253 | +nhsn_filtered <- nhsn_recent_archive %>% |
| 254 | + filter(geo_value %in% worst_geos) %>% |
| 255 | + filter(time_value >= "2024-11-19") |
| 256 | +nhsn_filtered$DT %<>% |
| 257 | + mutate(geo_value = factor(geo_value, levels = av_re_spread$geo_value[1:18])) |
| 258 | +``` |
| 259 | + |
| 260 | +## Large Mean Revision |
| 261 | +The states most likely to be subject to total revisions requiring substitution. |
| 262 | + |
| 263 | +```{r revision_plots, fig.width = 15, fig.height = 10, fig.align = "center", echo=FALSE} |
| 264 | +autoplot(nhsn_filtered, "value") + |
| 265 | + facet_wrap(~geo_value, ncol = 3, scales = "free") + theme(strip.text.x = element_text(size = 8)) + |
| 266 | + ylim(0, NA) + |
| 267 | + labs(title = "States with the largest mean revision") |
| 268 | +``` |
| 269 | + |
| 270 | + |
| 271 | +## All revisions |
| 272 | +```{r all_revision_plots, out.width="120%", fig.width = 15, fig.height = 60, fig.align = "center"} |
| 273 | +nhsn_recent_archive$DT %<>% mutate(geo_value = factor(geo_value, levels = av_re_spread$geo_value)) |
| 274 | +nhsn_recent_archive %>% |
| 275 | + autoplot("value") + facet_wrap(~geo_value, ncol = 3, scales = "free") + theme(strip.text.x = element_text(size = 8)) + |
| 276 | + labs(title = "States with the largest mean revision") |
| 277 | +``` |
| 278 | + |
| 279 | +# Forecasts from 8 weeks ago, sorted by decreasing recent WIS {.tabset} |
| 280 | +```{r plotting_recent_forecasts_function, echo=FALSE, message=FALSE} |
| 281 | +plotting_forecasts <- function(plotting_window, score_window, n_plotting) { |
| 282 | + geo_score_order <- scores_nhsn %>% |
| 283 | + filter(forecast_date > Sys.Date() - score_window * 7) %>% |
| 284 | + scores_by_state() %>% filter(forecaster == "CMU-TimeSeries") %>% arrange(desc(mean_wis_rate)) %>% pull(geo_value) |
| 285 | + geo_score_order <- geo_score_order[1:n_plotting] |
| 286 | + plotting_archive <- nhsn_archive |
| 287 | + plotting_archive$DT %<>% |
| 288 | + filter(geo_value %in% geo_score_order, time_value > plotting_window) %>% |
| 289 | + mutate(geo_value = factor(geo_value, levels = geo_score_order)) %>% |
| 290 | + left_join(state_census, by = join_by(geo_value == abbr)) %>% |
| 291 | + mutate(value = value * 1e5/ pop) %>% |
| 292 | + select(-pop) |
| 293 | + latest_data_date <- plotting_archive$DT %>% pull(time_value) %>% max() |
| 294 | + forecasts_to_plot <- forecasts %>% |
| 295 | + filter( |
| 296 | + forecaster == "CMU-TimeSeries", |
| 297 | + geo_value %in% geo_score_order, |
| 298 | + forecast_date > Sys.Date() - score_window * 7, |
| 299 | + forecast_date < latest_data_date - 1 * 7 |
| 300 | + ) %>% |
| 301 | + mutate( |
| 302 | + geo_value = factor(geo_value, levels = geo_score_order), |
| 303 | + forecast_date = factor(forecast_date) |
| 304 | + ) %>% |
| 305 | + ungroup() %>% |
| 306 | + left_join(state_census, by = join_by(geo_value == abbr)) %>% |
| 307 | + mutate(value = value * 1e5/ pop) %>% |
| 308 | + pivot_wider(names_from = quantile, values_from = value) %>% |
| 309 | + mutate(time_value = target_end_date) |
| 310 | + plotting_archive %>% |
| 311 | + autoplot() + |
| 312 | + new_scale_color() + |
| 313 | + geom_ribbon(data = forecasts_to_plot, aes(ymin = `0.25`, ymax = `0.75`, fill = forecaster, group = forecast_date), alpha = 0.4) + |
| 314 | + geom_ribbon(data = forecasts_to_plot, aes(ymin = `0.1`, ymax = `0.9`, fill = forecaster, group = forecast_date), alpha = 0.4) + |
| 315 | + geom_ribbon(data = forecasts_to_plot, aes(ymin = `0.05`, ymax = `0.95`, fill = forecaster, group = forecast_date), alpha = 0.4) + |
| 316 | + geom_line(data = forecasts_to_plot, aes(y = `0.5`, group = forecast_date), alpha = 0.4) + |
| 317 | + scale_color_brewer(palette = "Set3") + |
| 318 | + scale_fill_brewer(palette = "Set3") + |
| 319 | + facet_grid(factor(geo_value, levels = geo_score_order) ~ forecast_date, scale = if(n_plotting>20) "free" else "fixed") + |
| 320 | + labs(title=glue::glue("Worst WIS scoring forecasts over the past {score_window} weeks")) |
| 321 | +} |
| 322 | +``` |
| 323 | + |
| 324 | +## Worst 5 |
| 325 | +```{r plotting_recent_forecasts, out.width="300%", fig.dim=c(10,5), fig.align = "center", echo=FALSE, message=FALSE} |
| 326 | +plotting_forecasts(plotting_window = Sys.Date() - 12 * 7, score_window = 8, n_plotting = 5) |
| 327 | +``` |
| 328 | + |
| 329 | +## All |
| 330 | +```{r plotting_all_recent_forecasts, out.width="300%", fig.dim=c(12,60), fig.align = "center", message=FALSE} |
| 331 | +plotting_forecasts(plotting_window = Sys.Date() - 12 * 7, score_window = 8, n_plotting = 60) |
| 332 | +``` |
| 333 | + |
0 commit comments