@@ -57,9 +57,9 @@ medical insurance claims and the number of new confirmed COVID-19 cases per
5757
5858``` {r grab-epi-data}
5959# Select the `percent_cli` column from the data archive
60- doctor_visits <- archive_cases_dv_subset$DT %>%
61- select(geo_value, time_value, version, percent_cli) %>%
62- tidyr::drop_na(percent_cli) %>%
60+ doctor_visits <- archive_cases_dv_subset$DT |>
61+ select(geo_value, time_value, version, percent_cli) |>
62+ tidyr::drop_na(percent_cli) |>
6363 as_epi_archive(compactify = TRUE)
6464```
6565
@@ -76,8 +76,8 @@ doctor_visits <- pub_covidcast(
7676 geo_values = "ca,fl,ny,tx",
7777 time_values = epirange(20200601, 20211201),
7878 issues = epirange(20200601, 20211201)
79- ) %>%
80- rename(version = issue, percent_cli = value) %>%
79+ ) |>
80+ rename(version = issue, percent_cli = value) |>
8181 as_epi_archive(compactify = TRUE)
8282```
8383
@@ -98,20 +98,20 @@ percent_cli_data <- bind_rows(
9898 # Snapshotted data for the version-faithful forecasts
9999 map(
100100 forecast_dates,
101- ~ doctor_visits %>%
102- epix_as_of(.x) %>%
101+ ~ doctor_visits |>
102+ epix_as_of(.x) |>
103103 mutate(version = .x)
104- ) %>%
105- bind_rows() %>%
104+ ) |>
105+ bind_rows() |>
106106 mutate(version_faithful = TRUE),
107107 # Latest data for the version-faithless forecasts
108- doctor_visits %>%
109- epix_as_of(doctor_visits$versions_end) %>%
108+ doctor_visits |>
109+ epix_as_of(doctor_visits$versions_end) |>
110110 mutate(version_faithful = FALSE)
111111)
112112
113113p0 <-
114- ggplot(data = percent_cli_data %>% filter(geo_value == geo_choose)) +
114+ ggplot(data = percent_cli_data |> filter(geo_value == geo_choose)) +
115115 geom_vline(aes(color = factor(version), xintercept = version), lty = 2) +
116116 geom_line(
117117 aes(x = time_value, y = percent_cli, color = factor(version)),
@@ -153,9 +153,9 @@ of the red time-series to its left.
153153In fact, if we take a snapshot and get the last ` time_value ` :
154154
155155``` {r}
156- doctor_visits %>%
157- epix_as_of(as.Date("2020-08-01")) %>%
158- pull(time_value) %>%
156+ doctor_visits |>
157+ epix_as_of(as.Date("2020-08-01")) |>
158+ pull(time_value) |>
159159 max()
160160```
161161
@@ -184,14 +184,14 @@ One way to do this is by setting the `.version` argument for `epix_slide()`:
184184
185185``` {r single_version, warn = FALSE}
186186forecast_date <- as.Date("2021-04-06")
187- forecasts <- doctor_visits %>%
187+ forecasts <- doctor_visits |>
188188 epix_slide(
189189 ~ arx_forecaster(
190190 .x,
191191 outcome = "percent_cli",
192192 predictors = "percent_cli",
193193 args_list = arx_args_list()
194- )$predictions %>%
194+ )$predictions |>
195195 pivot_quantiles_wider(.pred_distn),
196196 .versions = forecast_date
197197 )
@@ -201,12 +201,12 @@ As truth data, we'll compare with the `epix_as_of()` to generate a snapshot of
201201the archive at the last date[ ^ 1 ] .
202202
203203``` {r compare_single_with_result}
204- forecasts %>%
204+ forecasts |>
205205 inner_join(
206- doctor_visits %>%
206+ doctor_visits |>
207207 epix_as_of(doctor_visits$versions_end),
208208 by = c("geo_value", "target_date" = "time_value")
209- ) %>%
209+ ) |>
210210 select(geo_value, forecast_date, .pred, `0.05`, `0.95`, percent_cli)
211211```
212212
@@ -226,9 +226,9 @@ This has the effect of simulating a data set that receives the final version
226226updates every day.
227227
228228``` {r}
229- archive_cases_dv_subset_faux <- doctor_visits %>%
230- epix_as_of(doctor_visits$versions_end) %>%
231- mutate(version = time_value) %>%
229+ archive_cases_dv_subset_faux <- doctor_visits |>
230+ epix_as_of(doctor_visits$versions_end) |>
231+ mutate(version = time_value) |>
232232 as_epi_archive()
233233```
234234
@@ -250,10 +250,10 @@ forecast_wrapper <- function(
250250 lags = c(0:7, 14, 21),
251251 adjust_latency = "extend_ahead"
252252 )
253- )$predictions %>%
253+ )$predictions |>
254254 pivot_quantiles_wider(.pred_distn)
255255 }
256- ) %>%
256+ ) |>
257257 bind_rows()
258258}
259259```
@@ -275,20 +275,20 @@ forecast_dates <- seq(
275275)
276276aheads <- c(1, 7, 14, 21, 28)
277277
278- version_faithless <- archive_cases_dv_subset_faux %>%
278+ version_faithless <- archive_cases_dv_subset_faux |>
279279 epix_slide(
280280 ~ forecast_wrapper(.x, aheads, "percent_cli", "percent_cli"),
281281 .before = 120,
282282 .versions = forecast_dates
283- ) %>%
283+ ) |>
284284 mutate(version_faithful = FALSE)
285285
286- version_faithful <- doctor_visits %>%
286+ version_faithful <- doctor_visits |>
287287 epix_slide(
288288 ~ forecast_wrapper(.x, aheads, "percent_cli", "percent_cli"),
289289 .before = 120,
290290 .versions = forecast_dates
291- ) %>%
291+ ) |>
292292 mutate(version_faithful = TRUE)
293293
294294forecasts <-
@@ -315,8 +315,8 @@ ny), we'll just display the results for two states, California (CA) and Florida
315315
316316``` {r plot_ca_forecasts, warning = FALSE}
317317geo_choose <- "ca"
318- forecasts_filtered <- forecasts %>%
319- filter(geo_value == geo_choose) %>%
318+ forecasts_filtered <- forecasts |>
319+ filter(geo_value == geo_choose) |>
320320 mutate(time_value = version)
321321
322322p1 <- # first plotting the forecasts as bands, lines and points
@@ -325,10 +325,10 @@ p1 <- # first plotting the forecasts as bands, lines and points
325325 geom_line(aes(y = .pred, color = factor(time_value)), linetype = 2L) +
326326 geom_point(aes(y = .pred, color = factor(time_value)), size = 0.75) +
327327 # the forecast date
328- geom_vline(data = percent_cli_data %>% filter(geo_value == geo_choose) %>% select(-version_faithful), aes(color = factor(version), xintercept = version), lty = 2) +
328+ geom_vline(data = percent_cli_data |> filter(geo_value == geo_choose) |> select(-version_faithful), aes(color = factor(version), xintercept = version), lty = 2) +
329329 # the underlying data
330330 geom_line(
331- data = percent_cli_data %>% filter(geo_value == geo_choose),
331+ data = percent_cli_data |> filter(geo_value == geo_choose),
332332 aes(x = time_value, y = percent_cli, color = factor(version)),
333333 inherit.aes = FALSE, na.rm = TRUE
334334 ) +
@@ -341,8 +341,8 @@ p1 <- # first plotting the forecasts as bands, lines and points
341341
342342``` {r plot_fl_forecasts, warning = FALSE}
343343geo_choose <- "fl"
344- forecasts_filtered <- forecasts %>%
345- filter(geo_value == geo_choose) %>%
344+ forecasts_filtered <- forecasts |>
345+ filter(geo_value == geo_choose) |>
346346 mutate(time_value = version)
347347
348348p2 <-
@@ -351,11 +351,11 @@ p2 <-
351351 geom_line(aes(y = .pred, color = factor(time_value)), linetype = 2L) +
352352 geom_point(aes(y = .pred, color = factor(time_value)), size = 0.75) +
353353 geom_vline(
354- data = percent_cli_data %>% filter(geo_value == geo_choose) %>% select(-version_faithful),
354+ data = percent_cli_data |> filter(geo_value == geo_choose) |> select(-version_faithful),
355355 aes(color = factor(version), xintercept = version), lty = 2
356356 ) +
357357 geom_line(
358- data = percent_cli_data %>% filter(geo_value == geo_choose),
358+ data = percent_cli_data |> filter(geo_value == geo_choose),
359359 aes(x = time_value, y = percent_cli, color = factor(version)),
360360 inherit.aes = FALSE, na.rm = TRUE
361361 ) +
397397
398398
399399[ ^ 1 ] : For forecasting a single day like this, we could have actually just used
400- ` doctor_visits %>% epix_as_of(forecast_date) ` to get the relevant snapshot, and then fed that into ` arx_forecaster() ` as we did in the [ landing
400+ ` doctor_visits |> epix_as_of(forecast_date) ` to get the relevant snapshot, and then fed that into ` arx_forecaster() ` as we did in the [ landing
401401page] ( ../index.html#motivating-example ) .
402402
403403
0 commit comments