Skip to content

Commit 2056e0a

Browse files
committed
dan's simple suggestions
1 parent a958343 commit 2056e0a

File tree

10 files changed

+149
-124
lines changed

10 files changed

+149
-124
lines changed

NEWS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.0.x will indicat
3838
- Replace `dist_quantiles()` with `hardhat::quantile_pred()`
3939
- Allow `quantile()` to threshold to an interval if desired (#434)
4040
- `arx_forecaster()` detects if there's enough data to predict
41-
- Add `plot_data` to `autoplot` so that forecasts can be plotted against the values they're predicting
41+
- Add `observed_response` to `autoplot` so that forecasts can be plotted against the values they're predicting
4242

4343
## Bug fixes
4444

R/autoplot.R

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ ggplot2::autoplot
1616
#' @param object,x An `epi_workflow`
1717
#' @param predictions A data frame with predictions. If `NULL`, only the
1818
#' original data is shown.
19-
#' @param plot_data An epi_df of the data to plot against. This is for the case
19+
#' @param observed_response An epi_df of the data to plot against. This is for the case
2020
#' where you have the actual results to compare the forecast against.
2121
#' @param .levels A numeric vector of levels to plot for any prediction bands.
2222
#' More than 3 levels begins to be difficult to see.
@@ -85,7 +85,7 @@ NULL
8585
autoplot.epi_workflow <- function(
8686
object,
8787
predictions = NULL,
88-
plot_data = NULL,
88+
observed_response = NULL,
8989
.levels = c(.5, .8, .9), ...,
9090
.color_by = c("all_keys", "geo_value", "other_keys", ".response", "all", "none"),
9191
.facet_by = c(".response", "other_keys", "all_keys", "geo_value", "all", "none"),
@@ -120,10 +120,10 @@ autoplot.epi_workflow <- function(
120120
} else {
121121
new_name_y <- names(y)
122122
}
123-
if (is.null(plot_data)) {
123+
if (is.null(observed_response)) {
124124
# the outcome has shifted, so we need to shift it forward (or back)
125125
# by the corresponding amount
126-
plot_data <- bind_cols(mold$extras$roles[mold_roles %in% keys], y)
126+
observed_response <- bind_cols(mold$extras$roles[mold_roles %in% keys], y)
127127
if (starts_with_impl("ahead_", names(y))) {
128128
shift <- as.numeric(old_name_y[2])
129129
} else if (starts_with_impl("lag_", names(y))) {
@@ -133,21 +133,22 @@ autoplot.epi_workflow <- function(
133133
new_name_y <- names(y)
134134
shift <- 0
135135
}
136-
plot_data <- rename(plot_data, !!new_name_y := !!names(y))
136+
observed_response <- rename(observed_response, !!new_name_y := !!names(y))
137137
if (!is.null(shift)) {
138-
plot_data <- mutate(plot_data, time_value = time_value + shift)
138+
observed_response <- mutate(observed_response, time_value = time_value + shift)
139139
}
140140
other_keys <- setdiff(key_colnames(object), c("geo_value", "time_value"))
141-
plot_data <- as_epi_df(plot_data,
141+
observed_response <- as_epi_df(observed_response,
142142
as_of = object$fit$meta$as_of,
143143
other_keys = other_keys
144144
)
145145
}
146146
if (is.null(predictions)) {
147147
return(autoplot(
148-
plot_data, new_name_y,
148+
observed_response, new_name_y,
149149
.color_by = .color_by, .facet_by = .facet_by, .base_color = .base_color,
150-
.facet_filter = {{ .facet_filter }}
150+
.facet_filter = {{ .facet_filter }},
151+
.max_facets = .max_facets
151152
))
152153
}
153154

@@ -157,27 +158,29 @@ autoplot.epi_workflow <- function(
157158
}
158159
predictions <- rename(predictions, time_value = target_date)
159160
}
160-
pred_cols_ok <- hardhat::check_column_names(predictions, key_colnames(plot_data))
161+
pred_cols_ok <- hardhat::check_column_names(predictions, key_colnames(observed_response))
161162
if (!pred_cols_ok$ok) {
162163
cli_warn(c(
163164
"`predictions` is missing required variables: {.var {pred_cols_ok$missing_names}}.",
164165
i = "Plotting the original data."
165166
))
166167
return(autoplot(
167-
plot_data, !!new_name_y,
168+
observed_response, !!new_name_y,
168169
.color_by = .color_by, .facet_by = .facet_by, .base_color = .base_color,
169-
.facet_filter = {{ .facet_filter }}
170+
.facet_filter = {{ .facet_filter }},
171+
.max_facets = .max_facets
170172
))
171173
}
172174

173175
# First we plot the history, always faceted by everything
174-
bp <- autoplot(plot_data, !!new_name_y,
176+
bp <- autoplot(observed_response, !!new_name_y,
175177
.color_by = "none", .facet_by = "all_keys",
176-
.base_color = "black", .facet_filter = {{ .facet_filter }}
178+
.base_color = "black", .facet_filter = {{ .facet_filter }},
179+
.max_facets = .max_facets
177180
)
178181

179182
# Now, prepare matching facets in the predictions
180-
ek <- epi_keys_only(plot_data)
183+
ek <- epi_keys_only(observed_response)
181184
predictions <- predictions %>%
182185
mutate(
183186
.facets = interaction(!!!rlang::syms(as.list(ek)), sep = " / "),
@@ -215,7 +218,7 @@ autoplot.epi_workflow <- function(
215218
#' @export
216219
#' @rdname autoplot-epipred
217220
autoplot.canned_epipred <- function(
218-
object, plot_data = NULL, ...,
221+
object, observed_response = NULL, ...,
219222
.color_by = c("all_keys", "geo_value", "other_keys", ".response", "all", "none"),
220223
.facet_by = c(".response", "other_keys", "all_keys", "geo_value", "all", "none"),
221224
.base_color = "dodgerblue4",
@@ -230,9 +233,10 @@ autoplot.canned_epipred <- function(
230233
predictions <- object$predictions %>%
231234
rename(time_value = target_date)
232235

233-
autoplot(ewf, predictions, plot_data, ...,
236+
autoplot(ewf, predictions, observed_response, ...,
234237
.color_by = .color_by, .facet_by = .facet_by,
235-
.base_color = .base_color, .facet_filter = {{ .facet_filter }}
238+
.base_color = .base_color, .facet_filter = {{ .facet_filter }},
239+
.max_facets = .max_facets
236240
)
237241
}
238242

README.Rmd

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ library(ggplot2)
124124
```
125125
</details>
126126

127-
To demonstrate using [`{epipredict}`](https://cmu-delphi.github.io/epipredict/) for forecasting, say we want to
127+
To demonstrate using [`{epipredict}`](https://cmu-delphi.github.io/epipredict/) for forecasting, suppose we want to
128128
predict COVID-19 deaths per 100k people for each of a subset of states
129129

130130
```{r subset_geos}
@@ -226,8 +226,8 @@ cases_deaths <-
226226
```
227227
</details>
228228

229-
After downloading and cleaning the cases and deaths data, we can plot
230-
a subset of the states, marking the desired forecast date:
229+
After downloading and cleaning deaths per capita, as well as cases per 100k people, we can plot
230+
a subset of the states, marking the desired forecast date with a vertical line:
231231

232232
<details>
233233
<summary> Plot </summary>
@@ -287,16 +287,16 @@ four_week_ahead <- arx_forecaster(
287287
four_week_ahead
288288
```
289289

290-
In our model setup, we are defining as predictors case rate lagged 0-3
291-
days, one week, and two weeks, and death rate lagged 0-2 weeks.
290+
In our model setup, we are using as predictors the case rate lagged 0-3
291+
days, one week, and two weeks, and the death rate lagged 0-2 weeks.
292292
The result `four_week_ahead` is both a fitted model object which could be used
293293
any time in the future to create different forecasts, and a set of predicted
294294
values (and prediction intervals) for each location 28 days after the forecast
295295
date.
296296

297297
Plotting the prediction intervals on the true values for our location subset[^2]:
298298

299-
[^2]: Alternatively, you could call `autoplot(four_week_ahead, plot_data =
299+
[^2]: Alternatively, you could call `autoplot(four_week_ahead, observed_response =
300300
cases_deaths)` to get the full collection of forecasts. This is too busy for
301301
the space we have for plotting here.
302302

@@ -310,14 +310,14 @@ restricted_predictions <-
310310
mutate(.response_name = "death_rate")
311311
forecast_plot <-
312312
four_week_ahead |>
313-
autoplot(plot_data = cases_deaths) +
313+
autoplot(observed_response = cases_deaths) +
314314
geom_vline(aes(xintercept = forecast_date)) +
315315
geom_text(
316316
data = forecast_date_label %>% filter(.response_name == "death_rate"),
317317
aes(x = dates, label = "forecast\ndate", y = heights),
318318
size = 3, hjust = "right"
319319
) +
320-
scale_x_date(date_breaks = "3 months", date_labels = "%Y %b") +
320+
scale_x_date(date_breaks = "3 months", date_labels = "%y %b") +
321321
theme(axis.text.x = element_text(angle = 90, hjust = 1))
322322
```
323323
</details>

README.md

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,8 @@ library(ggplot2)
6262

6363
To demonstrate using
6464
[`{epipredict}`](https://cmu-delphi.github.io/epipredict/) for
65-
forecasting, say we want to predict COVID-19 deaths per 100k people for
66-
each of a subset of states
65+
forecasting, suppose we want to predict COVID-19 deaths per 100k people
66+
for each of a subset of states
6767

6868
``` r
6969
used_locations <- c("ca", "ma", "ny", "tx")
@@ -173,8 +173,9 @@ cases_deaths <-
173173

174174
</details>
175175

176-
After downloading and cleaning the cases and deaths data, we can plot a
177-
subset of the states, marking the desired forecast date:
176+
After downloading and cleaning deaths per capita, as well as cases per
177+
100k people, we can plot a subset of the states, marking the desired
178+
forecast date with a vertical line:
178179

179180
<details>
180181
<summary>
@@ -234,7 +235,7 @@ four_week_ahead <- arx_forecaster(
234235
four_week_ahead
235236
#> ══ A basic forecaster of type ARX Forecaster ════════════════════════════════
236237
#>
237-
#> This forecaster was fit on 2025-03-03 14:43:07.
238+
#> This forecaster was fit on 2025-04-09 17:23:00.
238239
#>
239240
#> Training data was an <epi_df> with:
240241
#> • Geography: state,
@@ -251,8 +252,8 @@ four_week_ahead
251252
#>
252253
```
253254

254-
In our model setup, we are defining as predictors case rate lagged 0-3
255-
days, one week, and two weeks, and death rate lagged 0-2 weeks. The
255+
In our model setup, we are using as predictors the case rate lagged 0-3
256+
days, one week, and two weeks, and the death rate lagged 0-2 weeks. The
256257
result `four_week_ahead` is both a fitted model object which could be
257258
used any time in the future to create different forecasts, and a set of
258259
predicted values (and prediction intervals) for each location 28 days
@@ -274,14 +275,14 @@ restricted_predictions <-
274275
mutate(.response_name = "death_rate")
275276
forecast_plot <-
276277
four_week_ahead |>
277-
autoplot(plot_data = cases_deaths) +
278+
autoplot(observed_response = cases_deaths) +
278279
geom_vline(aes(xintercept = forecast_date)) +
279280
geom_text(
280281
data = forecast_date_label %>% filter(.response_name == "death_rate"),
281282
aes(x = dates, label = "forecast\ndate", y = heights),
282283
size = 3, hjust = "right"
283284
) +
284-
scale_x_date(date_breaks = "3 months", date_labels = "%Y %b") +
285+
scale_x_date(date_breaks = "3 months", date_labels = "%y %b") +
285286
theme(axis.text.x = element_text(angle = 90, hjust = 1))
286287
```
287288

@@ -337,9 +338,9 @@ email, or the InsightNet Slack.
337338
ago.
338339

339340
[^2]: Alternatively, you could call
340-
`autoplot(four_week_ahead, plot_data = cases_deaths)` to get the
341-
full collection of forecasts. This is too busy for the space we have
342-
for plotting here.
341+
`autoplot(four_week_ahead, observed_response = cases_deaths)` to get
342+
the full collection of forecasts. This is too busy for the space we
343+
have for plotting here.
343344

344345
[^3]: Note that these are not the same quantiles that we fit when
345346
creating `four_week_ahead`. They are extrapolated from those

man/autoplot-epipred.Rd

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/grf_quantiles.Rd

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/step_adjust_latency.Rd

Lines changed: 21 additions & 29 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vignettes/backtesting.Rmd

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,8 @@ p1 <- # first plotting the forecasts as bands, lines and points
337337
geom_vline(
338338
data = percent_cli_data |> filter(geo_value == geo_choose) |> select(-version_faithful),
339339
aes(color = factor(version), xintercept = version),
340-
lty = 2) +
340+
lty = 2
341+
) +
341342
# the underlying data
342343
geom_line(
343344
data = plotting_data |> filter(geo_value == geo_choose),

0 commit comments

Comments
 (0)