Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# dplyr (development version)

* `if_any()` and `if_all()` are now more consistent in all use cases (#7059, #7077, #7746, @jrwinget). In particular:

* When called with zero inputs, `if_any()` returns `FALSE` and `if_all()` returns `TRUE`.

* When called with one input, both now return logical vectors rather than the original column.

* The result of applying `.fns` now must be a logical vector.

* `tally_n()` creates fully qualified funciton calls for duckplyr compatibility (#7046)

* `storms` has been updated to include 2023 and 2024 data (#7111, @tomalrussell).
Expand Down Expand Up @@ -101,10 +109,6 @@
* Fixed an issue where duckplyr's ALTREP data frames were being materialized
early due to internal usage of `ncol()` (#7049).

* `if_any()` and `if_all()` are now fully consistent with `any()` and `all()`.
In particular, when called with empty inputs `if_any()` returns `FALSE` and
`if_all()` returns `TRUE` (#7059, @jrwinget).

## Lifecycle changes

### Breaking changes
Expand Down
123 changes: 58 additions & 65 deletions R/across.R
Original file line number Diff line number Diff line change
Expand Up @@ -349,31 +349,21 @@ across <- function(.cols, .fns, ..., .names = NULL, .unpack = FALSE) {
if_any <- function(.cols, .fns, ..., .names = NULL) {
context_local("across_if_fn", "if_any")
context_local("across_frame", current_env())
if_across(`|`, across({{ .cols }}, .fns, ..., .names = .names))
df <- across({{ .cols }}, .fns, ..., .names = .names)
x <- dplyr_new_list(df)
size <- vec_size(df)
dplyr_list_pany(x, size = size)
}

#' @rdname across
#' @export
if_all <- function(.cols, .fns, ..., .names = NULL) {
context_local("across_if_fn", "if_all")
context_local("across_frame", current_env())
if_across(`&`, across({{ .cols }}, .fns, ..., .names = .names))
}

if_across <- function(op, df) {
n <- nrow(df)

if (!length(df)) {
return(TRUE)
}

combine <- function(x, y) {
if (is_null(x)) {
y
} else {
op(x, y)
}
}
reduce(df, combine, .init = NULL)
df <- across({{ .cols }}, .fns, ..., .names = .names)
x <- dplyr_new_list(df)
size <- vec_size(df)
dplyr_list_pall(x, size = size)
}

#' Combine values from multiple columns
Expand Down Expand Up @@ -608,79 +598,82 @@ dplyr_quosures <- function(...) {
quosures
}

# When mutate() or summarise() have an unnamed call to across() at the top level, e.g.
# summarise(across(<...>)) or mutate(across(<...>))
# Expand an `if_any()` or `if_all()` call
#
# a call to top_across(<...>) is evaluated instead.
# top_across() returns a flattened list of expressions along with some
# information about the "current column" for each expression
# in the "columns" attribute:
# Always guaranteed to be 1 quosure in, 1 quosure out, unlike `expand_across()`.
#
# For example with: summarise(across(c(x, y), mean, .names = "mean_{.col}")) top_across() will return
# something like:
# For the dplyr backend, the main reason we expand at all is to evaluate
# tidyselection exactly once (rather than once per group), because tidyselection
# is rather slow.
#
# structure(
# list(mean_x = expr(mean(x)), mean_y = expr(mean(y)))
# columns = c("x", "y")
# )

# Technically this always returns a single quosure but we wrap it in a
# list to follow the pattern in `expand_across()`
# At one point we believed `if_any()` and `if_all()` could be implemented as
# "pure expansion" that would run before dispatching to other backends, like
# dbplyr. In theory this could expand to a chain of `&` and `|` operations that
# dbplyr would already know how to translate (so dbplyr itself would not have to
# know how to implement `if_any()` and `if_all()`), but in practice we need more
# error checking than what `x & y & z` gets us, so we actually expand to a
# vctrs-backed implementation since the "pure expansion" ideas have never played
# out.
expand_if_across <- function(quo) {
quo_data <- attr(quo, "dplyr:::data")
if (!quo_is_call(quo, c("if_any", "if_all"), ns = c("", "dplyr"))) {
return(list(quo))
if (quo_is_call(quo, "if_any", ns = c("", "dplyr"))) {
variant <- "any"
} else if (quo_is_call(quo, "if_all", ns = c("", "dplyr"))) {
variant <- "all"
} else {
# Refuse to expand
return(quo)
}

# `definition` is the same between the two for the purposes of `match.call()`
definition <- if_any

call <- match.call(
definition = if_any,
definition = definition,
call = quo_get_expr(quo),
expand.dots = FALSE,
envir = quo_get_env(quo)
)

if (!is_null(call$...)) {
return(list(quo))
# Refuse to expand
return(quo)
}

if (is_call(call, "if_any")) {
op <- "|"
if (variant == "any") {
if_fn <- "if_any"
empty <- FALSE
dplyr_fn <- "dplyr_list_pany"
} else {
op <- "&"
if_fn <- "if_all"
empty <- TRUE
dplyr_fn <- "dplyr_list_pall"
}

# `expand_across()` will always expand at this point given that we bailed on
# `...` usage early on, which is the only case that would stop expansion.
#
# Set frame here for backtrace truncation. But override error call via
# `local_error_call()` so it refers to the function we're expanding, e.g.
# `if_any()` and not `expand_if_across()`.
context_local("across_if_fn", if_fn)

# Set frame here for backtrace truncation. But override error call
# via `local_error_call()` so it refers to the function we're
# expanding, e.g. `if_any()` and not `expand_if_across()`.
context_local("across_frame", current_env())
local_error_call(call(if_fn))

call[[1]] <- quote(across)
quos <- expand_across(quo_set_expr(quo, call))

# Select all rows if there are no inputs for if_all(),
# but select no rows if there are no inputs for if_any().
if (!length(quos)) {
return(list(quo(!!empty)))
}
expr <- expr({
x <- list(!!!quos)
ns <- asNamespace("dplyr")

combine <- function(x, y) {
if (is_null(x)) {
y
} else {
call(op, x, y)
}
}
Comment on lines -672 to -678
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This previous expansion was written with dbplyr and other backends in mind. We thought the dplyr expansions could be used as is in these other settings, removing some work for them as they would already know how to deal with the bare expansion.

Unfortunately, as the linked issues reveal, these bare expansions don't work that well for the dplyr backend because they are missing things like input validation. If we add these in the expansion, then the original purpose of generic translation is defeated.

We've never actually pushed towards using these expansions in other packages, so although this feels like a step backward, we don't currently lose anything by making the expansion untranslatable.

@DavisVaughan Maybe add a comment about why we still need the expansion at all (to avoid tidyselect getting evaluated on every group).

expr <- reduce(quos, combine, .init = NULL)
# In the evaluation path, `across()` automatically recycles to common size,
# so we must here as well for compatibility. `across()` also returns a 0
# col, 1 row data frame in the case of no inputs so that it will recycle to
# the group size, which we also do here.
size <- ns[["dplyr_list_size_common"]](x, absent = 1L, call = call(!!if_fn))
x <- ns[["dplyr_list_recycle_common"]](x, size = size, call = call(!!if_fn))

ns[[!!dplyr_fn]](x, size = size, error_call = call(!!if_fn))
})

# Use `as_quosure()` instead of `new_quosure()` to avoid rewrapping
# quosure in case of single input
list(as_quosure(expr, env = baseenv()))
new_quosure(expr, env = baseenv())
}

expand_across <- function(quo) {
Expand Down
21 changes: 16 additions & 5 deletions R/filter.R
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,16 @@ filter_rows <- function(
mask <- DataMask$new(data, by, "filter", error_call = error_call)
on.exit(mask$forget(), add = TRUE)

dots <- filter_expand(dots, mask = mask, error_call = error_call)
filter_eval(dots, mask = mask, error_call = error_call, user_env = user_env)
# 1:1 mapping between `dots` and `dots_expanded`
dots_expanded <- filter_expand(dots, mask = mask, error_call = error_call)

filter_eval(
dots = dots,
dots_expanded = dots_expanded,
mask = mask,
error_call = error_call,
user_env = user_env
)
}

check_filter <- function(dots, error_call = caller_env()) {
Expand All @@ -174,6 +182,7 @@ check_filter <- function(dots, error_call = caller_env()) {

filter_expand <- function(dots, mask, error_call = caller_env()) {
env_filter <- env()

filter_expand_one <- function(dot, index) {
env_filter$current_expression <- index
dot <- expand_pick(dot, mask)
Expand All @@ -190,13 +199,15 @@ filter_expand <- function(dots, mask, error_call = caller_env()) {
}
)

dots <- list_flatten(dots)

new_quosures(dots)
}

# We evaluate `dots_expanded` but report errors relative to `dots` so that
# we show "In argument: `if_any(c(x, y), is.na)`" rather than its expanded form.
# This works because `dots` and `dots_expanded` have a 1:1 mapping.
filter_eval <- function(
dots,
dots_expanded,
mask,
error_call = caller_env(),
user_env = caller_env(2)
Expand All @@ -218,7 +229,7 @@ filter_eval <- function(
)

out <- withCallingHandlers(
mask$eval_all_filter(dots, env_filter),
mask$eval_all_filter(dots_expanded, env_filter),
error = dplyr_error_handler(
dots = dots,
mask = mask,
Expand Down
47 changes: 47 additions & 0 deletions R/vctrs.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,50 @@ dplyr_vec_ptype_common <- function(chunks, name) {
error = common_handler(name)
)
}

# Version of `vec_size_common()` that takes a list.
# Useful for delaying `!!!` when used within an `expr()` call.
dplyr_list_size_common <- function(
x,
...,
size = NULL,
absent = 0L,
call = caller_env()
) {
check_dots_empty0(...)
vec_size_common(!!!x, .size = size, .absent = absent, .call = call)
}

# Version of `vec_recycle_common()` that takes a list.
# Useful for delaying `!!!` when used within an `expr()` call.
dplyr_list_recycle_common <- function(
x,
...,
size = NULL,
call = caller_env()
) {
check_dots_empty0(...)
vec_recycle_common(!!!x, .size = size, .call = call)
}

dplyr_list_pall <- function(
x,
...,
missing = NA,
size = NULL,
error_call = caller_env()
) {
check_dots_empty0(...)
vec_pall(!!!x, .missing = missing, .size = size, .error_call = error_call)
}

dplyr_list_pany <- function(
x,
...,
missing = NA,
size = NULL,
error_call = caller_env()
) {
check_dots_empty0(...)
vec_pany(!!!x, .missing = missing, .size = size, .error_call = error_call)
}
Loading