@@ -14,7 +14,35 @@ knitr::opts_chunk$set(
1414)
1515```
1616
17- # epipredict
17+ ``` {r coloration, include=FALSE, echo=FALSE}
18+ base <- "#002676"
19+ primary <- "#941120"
20+ secondary <- "#f9c80e"
21+ tertiary <- "#177245"
22+ fourth_colour <- "#A393BF"
23+ fifth_colour <- "#2e8edd"
24+ colvec <- c(
25+ base = base, primary = primary, secondary = secondary,
26+ tertiary = tertiary, fourth_colour = fourth_colour,
27+ fifth_colour = fifth_colour
28+ )
29+ library(epiprocess)
30+ suppressMessages(library(tidyverse))
31+ theme_update(legend.position = "bottom", legend.title = element_blank())
32+ delphi_pal <- function(n) {
33+ if (n > 6L) warning("Not enough colors in this palette!")
34+ unname(colvec)[1:n]
35+ }
36+ scale_fill_delphi <- function(..., aesthetics = "fill") {
37+ discrete_scale(aesthetics = aesthetics, palette = delphi_pal, ...)
38+ }
39+ scale_color_delphi <- function(..., aesthetics = "color") {
40+ discrete_scale(aesthetics = aesthetics, palette = delphi_pal, ...)
41+ }
42+ scale_colour_delphi <- scale_color_delphi
43+ ```
44+
45+ # Epipredict
1846
1947<!-- badges: start -->
2048[ ![ R-CMD-check] ( https://github.com/cmu-delphi/epipredict/actions/workflows/R-CMD-check.yaml/badge.svg )] ( https://github.com/cmu-delphi/epipredict/actions/workflows/R-CMD-check.yaml )
@@ -51,7 +79,8 @@ cases <- pub_covidcast(
5179 time_type = "day",
5280 geo_type = "state",
5381 time_values = epirange(20200601, 20220101),
54- geo_values = "*") |>
82+ geo_values = "*"
83+ ) |>
5584 select(geo_value, time_value, case_rate = value)
5685
5786deaths <- pub_covidcast(
@@ -60,7 +89,8 @@ deaths <- pub_covidcast(
6089 time_type = "day",
6190 geo_type = "state",
6291 time_values = epirange(20200601, 20220101),
63- geo_values = "*") |>
92+ geo_values = "*"
93+ ) |>
6494 select(geo_value, time_value, death_rate = value)
6595cases_deaths <-
6696 full_join(cases, deaths, by = c("time_value", "geo_value")) |>
@@ -81,36 +111,39 @@ First, to eliminate some of the noise coming from daily reporting, we do 7 day a
81111
82112[ ^ 1 ] : This makes it so that any given day of the processed timeseries only depends on the previous week, which means that we avoid leaking future values when making a forecast.
83113
84- * Basic. Has data, calls forecaster with default arguments.
85- * Intermediate. Wants to examine changes to the arguments, take advantage of
86- built in flexibility.
87- * Advanced. Wants to write their own forecasters. Maybe willing to build up
88- from some components.
89-
90- The Advanced user should find their task to be relatively easy. Examples of
91- these tasks are illustrated in the [ vignettes and articles] ( https://cmu-delphi.github.io/epipredict ) .
92-
93- See also the (in progress) [ Forecasting Book] ( https://cmu-delphi.github.io/delphi-tooling-book/ ) .
94-
95- ## Intermediate example
96-
97- The package comes with some built-in historical data for illustration, but
98- up-to-date versions of this could be downloaded with the
99- [ ` {epidatr} ` package] ( https://cmu-delphi.github.io/epidatr/ )
100- and processed using
101- [ ` {epiprocess} ` ] ( https://cmu-delphi.github.io/epiprocess/ ) .[ ^ 1 ]
102-
103- [ ^ 1 ] : Other epidemiological signals for non-Covid related illnesses are also
104- available with [ ` {epidatr} ` ] ( https://github.com/cmu-delphi/epidatr ) which
105- interfaces directly to Delphi's
106- [ Epidata API] ( https://cmu-delphi.github.io/delphi-epidata/ )
107-
108- ``` {r epidf, message=FALSE}
109- library(epipredict)
110- covid_case_death_rates
114+ ``` {r smooth}
115+ cases_deaths <-
116+ cases_deaths |>
117+ group_by(geo_value) |>
118+ epi_slide(
119+ cases_7dav = mean(case_rate, na.rm = TRUE),
120+ death_rate_7dav = mean(death_rate, na.rm = TRUE),
121+ .window_size = 7
122+ ) |>
123+ ungroup() |>
124+ mutate(case_rate = NULL, death_rate = NULL) |>
125+ rename(case_rate = cases_7dav, death_rate = death_rate_7dav)
111126```
112127
113- To create and train a simple auto-regressive forecaster to predict the death rate two weeks into the future using past (lagged) deaths and cases, we could use the following function.
128+ Then trimming outliers, most especially negative values:
129+ ``` {r outlier}
130+ cases_deaths <-
131+ cases_deaths |>
132+ group_by(geo_value) |>
133+ mutate(
134+ outlr_death_rate = detect_outlr_rm(time_value, death_rate, detect_negatives = TRUE),
135+ outlr_case_rate = detect_outlr_rm(time_value, case_rate, detect_negatives = TRUE)
136+ ) |>
137+ unnest(cols = starts_with("outlr"), names_sep = "_") |>
138+ ungroup() |>
139+ mutate(
140+ death_rate = outlr_death_rate_replacement,
141+ case_rate = outlr_case_rate_replacement
142+ ) |>
143+ select(geo_value, time_value, case_rate, death_rate)
144+ cases_deaths
145+ ```
146+ </details >
114147
115148After having downloaded and cleaned the data in ` cases_deaths ` , we plot a subset
116149of the states, noting the actual forecast date:
@@ -121,8 +154,8 @@ of the states, noting the actual forecast date:
121154forecast_date_label <-
122155 tibble(
123156 geo_value = rep(plot_locations, 2),
124- source = c(rep("case_rate",4), rep("death_rate", 4)),
125- dates = rep(forecast_date - 7* 2, 2 * length(plot_locations)),
157+ source = c(rep("case_rate", 4), rep("death_rate", 4)),
158+ dates = rep(forecast_date - 7 * 2, 2 * length(plot_locations)),
126159 heights = c(rep(150, 4), rep(1.0, 4))
127160 )
128161processed_data_plot <-
@@ -134,7 +167,8 @@ processed_data_plot <-
134167 facet_grid(source ~ geo_value, scale = "free") +
135168 geom_vline(aes(xintercept = forecast_date)) +
136169 geom_text(
137- data = forecast_date_label, aes(x=dates, label = "forecast\ndate", y = heights), size = 3, hjust = "right") +
170+ data = forecast_date_label, aes(x = dates, label = "forecast\ndate", y = heights), size = 3, hjust = "right"
171+ ) +
138172 scale_x_date(date_breaks = "3 months", date_labels = "%Y %b") +
139173 theme(axis.text.x = element_text(angle = 90, hjust = 1))
140174```
@@ -185,7 +219,8 @@ narrow_data_plot <-
185219 facet_grid(source ~ geo_value, scale = "free") +
186220 geom_vline(aes(xintercept = forecast_date)) +
187221 geom_text(
188- data = forecast_date_label, aes(x=dates, label = "forecast\ndate", y = heights), size = 3, hjust = "right") +
222+ data = forecast_date_label, aes(x = dates, label = "forecast\ndate", y = heights), size = 3, hjust = "right"
223+ ) +
189224 scale_x_date(date_breaks = "3 months", date_labels = "%Y %b") +
190225 theme(axis.text.x = element_text(angle = 90, hjust = 1))
191226```
@@ -203,7 +238,8 @@ forecast_plot <-
203238 epipredict:::plot_bands(
204239 restricted_predictions,
205240 levels = 0.9,
206- fill = primary) +
241+ fill = primary
242+ ) +
207243 geom_point(data = restricted_predictions, aes(y = .data$value), color = secondary)
208244```
209245
0 commit comments