Skip to content

Commit 737f99c

Browse files
committed
actually need the report notebook
1 parent 8926514 commit 737f99c

File tree

1 file changed

+333
-0
lines changed

1 file changed

+333
-0
lines changed
Lines changed: 333 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,333 @@
1+
---
2+
title: "`r params$disease` `r params$target` score report"
3+
author: Delphi Forecast Team
4+
date: "Rendered: `r format(Sys.time(), '%Y-%m-%d %H:%M:%S')`"
5+
output:
6+
html_document:
7+
code_folding: hide
8+
toc: True
9+
fig_crop: no
10+
# self_contained: False
11+
# lib_dir: libs
12+
params:
13+
disease: "covid"
14+
target: ""
15+
nhsn_archive: ""
16+
scores_nhsn: ""
17+
external_forecasts: ""
18+
---
19+
20+
```{css, echo=FALSE}
21+
img {
22+
max-width:125%;
23+
height:auto;
24+
}
25+
```
26+
27+
```{r setup, include=FALSE}
28+
knitr::opts_chunk$set(warning=FALSE, message=FALSE)
29+
ggplot2::theme_set(ggplot2::theme_bw())
30+
options(width=500)
31+
library(ggnewscale)
32+
#suppressPackageStartupMessages(source("R/load_all.R"))
33+
## external_forecasts <- qs2::qs_read(here::here("covid_hosp_prod", "objects", "external_forecasts"))
34+
## nhsn_archive <- qs2::qs_read(here::here("covid_hosp_prod", "objects", "nhsn_archive_data"))
35+
## scores_nhsn <- qs2::qs_read(here::here("covid_hosp_prod", "objects", "scores_nhsn"))
36+
```
37+
38+
# Recent scores {.tabset}
39+
40+
```{r adjusting data, echo=FALSE, message=FALSE}
41+
forecasts <- external_forecasts %>%
42+
filter(target == "wk inc covid hosp") %>%
43+
select(-target)
44+
our_forecasters <- c("linear", "windowed_seasonal", "windowed_seasonal_nssp", "climate_base", "climate_geo_agged", "climate_linear", "ensemble_windowed", "retro_submission", "CMU-TimeSeries", "seasonal_nssp_latest")
45+
scores_nhsn <- scores_nhsn %>%
46+
mutate(
47+
forecaster =
48+
case_match(
49+
forecaster,
50+
"windowed_seasonal_extra_sources" ~ "windowed_seasonal_nssp",
51+
"ensemble_linclim_windowed_seasonal" ~ "retro_submission",
52+
"ens_ar_only" ~ "ensemble_windowed",
53+
.default = forecaster
54+
)) %>%
55+
left_join(state_census, by = join_by(geo_value == abbr)) %>%
56+
mutate(wis_rate = wis * 1e5 / pop) %>%
57+
mutate(ae_rate = ae_median * 1e5 / pop) %>%
58+
mutate(pop = factor(pop))
59+
Mean <- function(x) mean(x, na.rm = TRUE)
60+
GeoMean <- function(x, offset = 0) exp(Mean(log(x + offset)))
61+
```
62+
```{r score_summary_functions, echo=FALSE, message=FALSE}
63+
scores_by_state <- function(scores) {
64+
scores %>%
65+
group_by(forecaster, geo_value) %>%
66+
summarize(
67+
mean_wis = Mean(wis),
68+
geo_mean_wis = GeoMean(wis),
69+
mean_ae = Mean(ae_median),
70+
geomean_ae = GeoMean(ae_median),
71+
mean_wis_rate = Mean(wis_rate),
72+
geo_mean_wis_rate = GeoMean(wis_rate),
73+
mean_ae_rate = Mean(ae_rate),
74+
geomean_ae_rate = GeoMean(ae_rate),
75+
.groups = "drop"
76+
) %>%
77+
arrange(forecaster, mean_wis_rate)
78+
}
79+
se_mean_states <- function(scores) {
80+
scores_by_state(scores) %>%
81+
group_by(forecaster) %>%
82+
summarize(
83+
se_mean_wis = sd(mean_wis) / sqrt(n()),
84+
se_geomean_wis = sd(geo_mean_wis) / sqrt(n()),
85+
se_mean_ae = sd(mean_ae) / sqrt(n()),
86+
se_geomean_ae = sd(geomean_ae) / sqrt(n()),
87+
se_mean_wis_rate = sd(mean_wis_rate) / sqrt(n()),
88+
se_geomean_wis_rate = sd(geo_mean_wis_rate) / sqrt(n()),
89+
se_mean_ae_rate = sd(mean_ae_rate) / sqrt(n()),
90+
se_geomean_ae_rate = sd(geomean_ae_rate) / sqrt(n()),
91+
.groups = "drop"
92+
) %>%
93+
mutate(across(starts_with("se"), \(x) round(x, 2)))
94+
}
95+
score_summary <- function(scores) {
96+
forecast_date <- scores %>%
97+
distinct(forecast_date) %>%
98+
pull() %>%
99+
max()
100+
scores %>%
101+
group_by(forecaster) %>%
102+
mutate(
103+
min_wis = min(wis[wis > 1e-5]),
104+
min_ae = min(ae_median[ae_median > 1e-5])
105+
) %>%
106+
summarize(
107+
mean_wis = round(Mean(wis), 2),
108+
mean_wis_rate = round(Mean(wis_rate), 2),
109+
geomean_wis = round(GeoMean(wis, min_wis), 2),
110+
mean_ae = round(Mean(ae_median), 2),
111+
mean_ae_rate = round(Mean(ae_rate), 2),
112+
geomean_ae = round(GeoMean(ae_median, min_ae), 2),
113+
mean_cov_50 = round(Mean(interval_coverage_50), 2),
114+
mean_cov_90 = round(Mean(interval_coverage_90), 2),
115+
n = n(),
116+
.groups = "drop"
117+
) %>%
118+
left_join(se_mean_states(scores), by = "forecaster") %>%
119+
arrange(mean_wis) %>%
120+
select(forecaster, mean_wis, se_mean_wis, mean_wis_rate, se_mean_wis_rate, mean_cov_50, mean_cov_90, geomean_wis) %>%
121+
mutate(forecast_date = .env$forecast_date)
122+
}
123+
datatable_function <- function(score_summary) {
124+
datatable(
125+
score_summary,
126+
fillContainer = FALSE,
127+
options = list(
128+
initComplete = htmlwidgets::JS(
129+
"function(settings, json) {",
130+
paste0("$(this.api().table().container()).css({'font-size': '", "10pt", "'});"),
131+
"}"),
132+
pageLength = 25
133+
)
134+
) %>%
135+
formatStyle("forecaster", target = c("cell"),
136+
textDecoration = styleEqual("CMU-TimeSeries", "underline")) %>%
137+
formatStyle(
138+
"mean_wis",
139+
background = styleColorBar(c(0, max(score_summary$mean_wis)), 'lightblue'),
140+
backgroundSize = '98% 88%',
141+
backgroundRepeat = 'no-repeat',
142+
backgroundPosition = 'center'
143+
) %>%
144+
formatStyle(
145+
"mean_wis_rate",
146+
background = styleColorBar(c(0, max(score_summary$mean_wis_rate)), 'lightblue'),
147+
backgroundSize = '98% 88%',
148+
backgroundRepeat = 'no-repeat',
149+
backgroundPosition = 'center'
150+
) %>%
151+
formatStyle(
152+
"geomean_wis",
153+
backgroundound = styleColorBar(c(0, max(score_summary$geomean_wis)), 'lightblue'),
154+
backgroundSize = '98% 88%',
155+
backgroundRepeat = 'no-repeat',
156+
backgroundPosition = 'center'
157+
)
158+
}
159+
```
160+
## Last 8 weeks
161+
162+
```{r datatable_short, out.width= "100%", fig.height = 60, fig.width = 12, echo=FALSE, message=FALSE}
163+
score_summary_short <- scores_nhsn %>%
164+
filter(forecast_date > Sys.Date() - 8 * 7) %>%
165+
score_summary()
166+
datatable_function(score_summary_short)
167+
```
168+
169+
## Last 52 weeks
170+
```{r datatable_long, fig.height = 60, fig.width = 12, echo=FALSE, message=FALSE}
171+
score_summary_long <- scores_nhsn %>%
172+
filter(forecast_date > Sys.Date() - 52 * 7) %>%
173+
score_summary()
174+
datatable_function(score_summary_long)
175+
```
176+
177+
# Moving 8 week average of scores{.tabset}
178+
179+
```{r moving_average, echo=FALSE, message=FALSE, include=FALSE}
180+
ordered_scores <- scores_nhsn %>%
181+
arrange(forecast_date)
182+
sliding_ordered <- ordered_scores %>%
183+
slide_index_dfr(
184+
.i = ordered_scores$forecast_date,
185+
.f = score_summary,
186+
.before = lubridate::weeks(8),
187+
.complete = TRUE
188+
) %>%
189+
group_by(forecaster, forecast_date) %>%
190+
slice(1)
191+
sliding_ordered %>%
192+
ggplot(aes(x=forecast_date, y = mean_wis, color = forecaster)) +
193+
geom_line() +
194+
ylim(0, NA) +
195+
scale_color_viridis_d()
196+
```
197+
198+
## Mean WIS
199+
200+
```{r moving_average_wis, echo=FALSE, message=FALSE, out.width="100%"}
201+
sliding_ordered %>%
202+
ggplot(aes(x=forecast_date, y = mean_wis, color = forecaster)) +
203+
ylim(0, NA) +
204+
geom_line() +
205+
scale_color_viridis_d()
206+
```
207+
208+
## Mean WIS rate
209+
210+
```{r moving_average_wis_rate, echo=FALSE, message=FALSE, out.width="100%"}
211+
sliding_ordered %>%
212+
ggplot(aes(x=forecast_date, y = mean_wis_rate, color = forecaster)) +
213+
ylim(0, NA) +
214+
geom_line() +
215+
scale_color_viridis_d()
216+
```
217+
218+
## Mean Coverage 90
219+
220+
```{r moving_average_cov90, echo=FALSE, message=FALSE, out.width="100%"}
221+
sliding_ordered %>%
222+
ggplot(aes(x=forecast_date, y = mean_cov_90, color = forecaster)) +
223+
ylim(0, 1) +
224+
geom_line() +
225+
scale_color_viridis_d()
226+
```
227+
228+
## Mean Coverage 50
229+
230+
```{r moving_average_cov50, echo=FALSE, message=FALSE, out.width="100%"}
231+
sliding_ordered %>%
232+
ggplot(aes(x=forecast_date, y = mean_cov_50, color = forecaster)) +
233+
ylim(0, 1) +
234+
geom_line() +
235+
scale_color_viridis_d()
236+
```
237+
238+
# Large recent data revisions {.tabset}
239+
240+
```{r revisions, echo=FALSE, message=FALSE, out.width="100%"}
241+
nhsn_recent_archive <- nhsn_archive %>%
242+
filter(Sys.Date() - time_value < 10*7)
243+
nhsn_recent_archive$time_type <- "day"
244+
revision_sum <- nhsn_recent_archive %>%
245+
epiprocess::revision_analysis(value, min_waiting_period = NULL)
246+
av_re_spread <- revision_sum$revision_behavior %>%
247+
group_by(geo_value) %>%
248+
summarize(rel_spread = mean(rel_spread, na.rm = TRUE)) %>%
249+
arrange(desc(rel_spread)) %>%
250+
filter(geo_value %nin% c("vi", "as", "gu"))
251+
worst_geos <- av_re_spread %>% filter((rel_spread > 0.10)) %>% pull(geo_value)
252+
worst_geos <- worst_geos[1:9]
253+
nhsn_filtered <- nhsn_recent_archive %>%
254+
filter(geo_value %in% worst_geos) %>%
255+
filter(time_value >= "2024-11-19")
256+
nhsn_filtered$DT %<>%
257+
mutate(geo_value = factor(geo_value, levels = av_re_spread$geo_value[1:18]))
258+
```
259+
260+
## Large Mean Revision
261+
The states most likely to be subject to total revisions requiring substitution.
262+
263+
```{r revision_plots, fig.width = 15, fig.height = 10, fig.align = "center", echo=FALSE}
264+
autoplot(nhsn_filtered, "value") +
265+
facet_wrap(~geo_value, ncol = 3, scales = "free") + theme(strip.text.x = element_text(size = 8)) +
266+
ylim(0, NA) +
267+
labs(title = "States with the largest mean revision")
268+
```
269+
270+
271+
## All revisions
272+
```{r all_revision_plots, out.width="120%", fig.width = 15, fig.height = 60, fig.align = "center"}
273+
nhsn_recent_archive$DT %<>% mutate(geo_value = factor(geo_value, levels = av_re_spread$geo_value))
274+
nhsn_recent_archive %>%
275+
autoplot("value") + facet_wrap(~geo_value, ncol = 3, scales = "free") + theme(strip.text.x = element_text(size = 8)) +
276+
labs(title = "States with the largest mean revision")
277+
```
278+
279+
# Forecasts from 8 weeks ago, sorted by decreasing recent WIS {.tabset}
280+
```{r plotting_recent_forecasts_function, echo=FALSE, message=FALSE}
281+
plotting_forecasts <- function(plotting_window, score_window, n_plotting) {
282+
geo_score_order <- scores_nhsn %>%
283+
filter(forecast_date > Sys.Date() - score_window * 7) %>%
284+
scores_by_state() %>% filter(forecaster == "CMU-TimeSeries") %>% arrange(desc(mean_wis_rate)) %>% pull(geo_value)
285+
geo_score_order <- geo_score_order[1:n_plotting]
286+
plotting_archive <- nhsn_archive
287+
plotting_archive$DT %<>%
288+
filter(geo_value %in% geo_score_order, time_value > plotting_window) %>%
289+
mutate(geo_value = factor(geo_value, levels = geo_score_order)) %>%
290+
left_join(state_census, by = join_by(geo_value == abbr)) %>%
291+
mutate(value = value * 1e5/ pop) %>%
292+
select(-pop)
293+
latest_data_date <- plotting_archive$DT %>% pull(time_value) %>% max()
294+
forecasts_to_plot <- forecasts %>%
295+
filter(
296+
forecaster == "CMU-TimeSeries",
297+
geo_value %in% geo_score_order,
298+
forecast_date > Sys.Date() - score_window * 7,
299+
forecast_date < latest_data_date - 1 * 7
300+
) %>%
301+
mutate(
302+
geo_value = factor(geo_value, levels = geo_score_order),
303+
forecast_date = factor(forecast_date)
304+
) %>%
305+
ungroup() %>%
306+
left_join(state_census, by = join_by(geo_value == abbr)) %>%
307+
mutate(value = value * 1e5/ pop) %>%
308+
pivot_wider(names_from = quantile, values_from = value) %>%
309+
mutate(time_value = target_end_date)
310+
plotting_archive %>%
311+
autoplot() +
312+
new_scale_color() +
313+
geom_ribbon(data = forecasts_to_plot, aes(ymin = `0.25`, ymax = `0.75`, fill = forecaster, group = forecast_date), alpha = 0.4) +
314+
geom_ribbon(data = forecasts_to_plot, aes(ymin = `0.1`, ymax = `0.9`, fill = forecaster, group = forecast_date), alpha = 0.4) +
315+
geom_ribbon(data = forecasts_to_plot, aes(ymin = `0.05`, ymax = `0.95`, fill = forecaster, group = forecast_date), alpha = 0.4) +
316+
geom_line(data = forecasts_to_plot, aes(y = `0.5`, group = forecast_date), alpha = 0.4) +
317+
scale_color_brewer(palette = "Set3") +
318+
scale_fill_brewer(palette = "Set3") +
319+
facet_grid(factor(geo_value, levels = geo_score_order) ~ forecast_date, scale = if(n_plotting>20) "free" else "fixed") +
320+
labs(title=glue::glue("Worst WIS scoring forecasts over the past {score_window} weeks"))
321+
}
322+
```
323+
324+
## Worst 5
325+
```{r plotting_recent_forecasts, out.width="300%", fig.dim=c(10,5), fig.align = "center", echo=FALSE, message=FALSE}
326+
plotting_forecasts(plotting_window = Sys.Date() - 12 * 7, score_window = 8, n_plotting = 5)
327+
```
328+
329+
## All
330+
```{r plotting_all_recent_forecasts, out.width="300%", fig.dim=c(12,60), fig.align = "center", message=FALSE}
331+
plotting_forecasts(plotting_window = Sys.Date() - 12 * 7, score_window = 8, n_plotting = 60)
332+
```
333+

0 commit comments

Comments
 (0)