1212# ' and one of them must be unused. The ECDF will be calculated on the given aesthetic
1313# ' and will be output on the unused one.
1414# '
15+ # ' If the `weight` aesthetic is provided, a weighted ECDF will be computed. In
16+ # ' this case, the ECDF is incremented by `weight / sum(weight)` instead of
17+ # ' `1 / length(x)` for each observation.
18+ # '
1519# ' @inheritParams layer
1620# ' @inheritParams geom_point
1721# ' @param na.rm If `FALSE` (the default), removes missing values with
2024# ' of points to interpolate with.
2125# ' @param pad If `TRUE`, pad the ecdf with additional points (-Inf, 0)
2226# ' and (Inf, 1)
27+ # ' @eval rd_aesthetics("stat", "ecdf")
2328# ' @eval rd_computed_vars(
2429# ' ecdf = "Cumulative density corresponding to `x`.",
2530# ' y = "`r lifecycle::badge('superseded')` For backward compatibility."
2631# ' )
32+ # ' @section Dropped variables:
33+ # ' \describe{
34+ # ' \item{weight}{After calculation, weights of individual observations (if
35+ # ' supplied), are no longer available.}
36+ # ' }
2737# ' @export
2838# ' @examples
2939# ' set.seed(1)
4151# ' # Multiple ECDFs
4252# ' ggplot(df, aes(x, colour = g)) +
4353# ' stat_ecdf()
54+ # '
55+ # ' # Using weighted eCDF
56+ # ' weighted <- data.frame(x = 1:10, weights = c(1:5, 5:1))
57+ # ' plain <- data.frame(x = rep(weighted$x, weighted$weights))
58+ # '
59+ # ' ggplot(plain, aes(x)) +
60+ # ' stat_ecdf(linewidth = 1) +
61+ # ' stat_ecdf(
62+ # ' aes(weight = weights),
63+ # ' data = weighted, colour = "green"
64+ # ' )
4465stat_ecdf <- function (mapping = NULL , data = NULL ,
4566 geom = " step" , position = " identity" ,
4667 ... ,
@@ -74,7 +95,7 @@ stat_ecdf <- function(mapping = NULL, data = NULL,
7495StatEcdf <- ggproto(" StatEcdf" , Stat ,
7596 required_aes = c(" x|y" ),
7697
77- default_aes = aes(x = after_stat(ecdf ), y = after_stat(ecdf )),
98+ default_aes = aes(x = after_stat(ecdf ), y = after_stat(ecdf ), weight = NULL ),
7899
79100 setup_params = function (self , data , params ) {
80101 params $ flipped_aes <- has_flipped_aes(data , params , main_is_orthogonal = FALSE , main_is_continuous = TRUE )
@@ -100,7 +121,7 @@ StatEcdf <- ggproto("StatEcdf", Stat,
100121 if (pad ) {
101122 x <- c(- Inf , x , Inf )
102123 }
103- data_ecdf <- stats :: ecdf (data $ x )(x )
124+ data_ecdf <- wecdf (data $ x , data $ weight )(x )
104125
105126 df_ecdf <- data_frame0(
106127 x = x ,
@@ -110,6 +131,63 @@ StatEcdf <- ggproto("StatEcdf", Stat,
110131 )
111132 df_ecdf $ flipped_aes <- flipped_aes
112133 flip_data(df_ecdf , flipped_aes )
113- }
134+ },
135+
136+ dropped_aes = " weight"
114137)
115138
139+ # Weighted eCDF function
140+ wecdf <- function (x , weights = NULL ) {
141+
142+ weights <- weights %|| % 1
143+ weights <- vec_recycle(weights , length(x ))
144+
145+ # Sort vectors
146+ ord <- order(x , na.last = NA )
147+ x <- x [ord ]
148+ weights <- weights [ord ]
149+
150+ if (any(! is.finite(weights ))) {
151+ cli :: cli_warn(c(paste0(
152+ " The {.field weight} aesthetic does not support non-finite or " ,
153+ " {.code NA} values."
154+ ), " i" = " These weights were replaced by {.val 0}." ))
155+ weights [! is.finite(weights )] <- 0
156+ }
157+
158+ # `total` replaces `length(x)`
159+ total <- sum(weights )
160+
161+ if (abs(total ) < 1000 * .Machine $ double.eps ) {
162+ if (total == 0 ) {
163+ cli :: cli_abort(paste0(
164+ " Cannot compute eCDF when the {.field weight} aesthetic sums up to " ,
165+ " {.val 0}."
166+ ))
167+ }
168+ cli :: cli_warn(c(
169+ " The sum of the {.field weight} aesthetic is close to {.val 0}." ,
170+ " i" = " Computed eCDF might be unstable."
171+ ))
172+ }
173+
174+ # Link each observation to unique value
175+ vals <- unique0(x )
176+ matched <- match(x , vals )
177+
178+ # Instead of tabulating `matched`, as we would for unweighted `ecdf(x)`,
179+ # we sum weights per unique value of `x`
180+ agg_weights <- vapply(
181+ split(weights , matched ),
182+ sum , numeric (1 )
183+ )
184+
185+ # Like `ecdf(x)`, we return an approx function
186+ approxfun(
187+ vals ,
188+ cumsum(agg_weights ) / total ,
189+ method = " constant" ,
190+ yleft = 0 , yright = 1 ,
191+ f = 0 , ties = " ordered"
192+ )
193+ }
0 commit comments