From df750013d9f7f6c8e7eef4c4faf20bfe50c0bb80 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Thu, 22 Apr 2021 08:52:34 -0500 Subject: [PATCH 01/11] Implement tidyverse recycling rules for str_c() --- DESCRIPTION | 1 + R/c.r | 26 ++++++++++++++++-------- man/str_c.Rd | 22 ++++++++++++++------ tests/testthat/_snaps/c.md | 7 +++++++ tests/testthat/{test-join.r => test-c.r} | 5 +++++ 5 files changed, 47 insertions(+), 14 deletions(-) create mode 100644 tests/testthat/_snaps/c.md rename tests/testthat/{test-join.r => test-c.r} (70%) diff --git a/DESCRIPTION b/DESCRIPTION index 7ed51f39..ee66b4eb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,6 +26,7 @@ Imports: magrittr, rlang, stringi (>= 1.5.3), + vctrs, withr Suggests: covr, diff --git a/R/c.r b/R/c.r index 02c8f4ca..663761d0 100644 --- a/R/c.r +++ b/R/c.r @@ -1,9 +1,7 @@ #' Join multiple strings into a single string #' -#' @description -#' `r lifecycle::badge("superseded")` -#' -#' `str_c()` is no longer needed; please use `paste0()` instead. +#' `str_c()` is a version of `paste0()` that uses tidyverse recycling and +#' `NA` rules. #' #' @details #' @@ -14,9 +12,10 @@ #' is collapsed into a single string. If non-`NULL` that string is inserted at #' the end of each row, and the entire matrix collapsed to a single string. #' -#' @param ... One or more character vectors. Zero length arguments -#' are removed. Short arguments are recycled to the length of the -#' longest. +#' @param ... One or more character vectors. +#' +#' `NULL`s are removed; scalar inputs (vectors of length 1) are recycled to +#' the common length of vector inputs. #' #' Like most other R functions, missing values are "infectious": whenever #' a missing value is combined with another string the result will always @@ -41,11 +40,22 @@ #' str_c(letters, collapse = "") #' str_c(letters, collapse = ", ") #' +#' # Differences from paste() ---------------------- +#' # Missing inputs give missing outputs +#' str_c(c("a", NA, "b"), "-d") +#' paste0(c("a", NA, "b"), "-d") +#' +#' # Uses tidyverse recycling rules +#' \dontrun{str_c(1:2, 1:3)} # errors +#' paste0(1:2, 1:3) +#' #' # Missing inputs give missing outputs #' str_c(c("a", NA, "b"), "-d") #' # Use str_replace_NA to display literal NAs: #' str_c(str_replace_na(c("a", NA, "b")), "-d") +#' #' @import stringi str_c <- function(..., sep = "", collapse = NULL) { - stri_c(..., sep = sep, collapse = collapse, ignore_null = TRUE) + dots <- vctrs::vec_recycle_common(...) + exec(stri_c, !!!dots, sep = sep, collapse = collapse, ignore_null = TRUE) } diff --git a/man/str_c.Rd b/man/str_c.Rd index 289ed0cf..ebf612e3 100644 --- a/man/str_c.Rd +++ b/man/str_c.Rd @@ -7,9 +7,10 @@ str_c(..., sep = "", collapse = NULL) } \arguments{ -\item{...}{One or more character vectors. Zero length arguments -are removed. Short arguments are recycled to the length of the -longest. +\item{...}{One or more character vectors. + +\code{NULL}s are removed; scalar inputs (vectors of length 1) are recycled to +the common length of vector inputs. Like most other R functions, missing values are "infectious": whenever a missing value is combined with another string the result will always @@ -27,9 +28,8 @@ length equal to the longest input string. If \code{collapse} is non-NULL, a character vector of length 1. } \description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} - -\code{str_c()} is no longer needed; please use \code{paste0()} instead. +\code{str_c()} is a version of \code{paste0()} that uses tidyverse recycling and +\code{NA} rules. } \details{ To understand how \code{str_c} works, you need to imagine that you are building up @@ -48,10 +48,20 @@ str_c(letters[-26], " comes before ", letters[-1]) str_c(letters, collapse = "") str_c(letters, collapse = ", ") +# Differences from paste() ---------------------- +# Missing inputs give missing outputs +str_c(c("a", NA, "b"), "-d") +paste0(c("a", NA, "b"), "-d") + +# Uses tidyverse recycling rules +\dontrun{str_c(1:2, 1:3)} # errors +paste0(1:2, 1:3) + # Missing inputs give missing outputs str_c(c("a", NA, "b"), "-d") # Use str_replace_NA to display literal NAs: str_c(str_replace_na(c("a", NA, "b")), "-d") + } \seealso{ \code{\link[=paste]{paste()}} for equivalent base R functionality, and diff --git a/tests/testthat/_snaps/c.md b/tests/testthat/_snaps/c.md new file mode 100644 index 00000000..d3069185 --- /dev/null +++ b/tests/testthat/_snaps/c.md @@ -0,0 +1,7 @@ +# obeys tidyverse recycling rules + + Code + str_c(1:2, 1:3) + Error + Can't recycle `..1` (size 2) to match `..2` (size 3). + diff --git a/tests/testthat/test-join.r b/tests/testthat/test-c.r similarity index 70% rename from tests/testthat/test-join.r rename to tests/testthat/test-c.r index fae1bc85..0645b1cd 100644 --- a/tests/testthat/test-join.r +++ b/tests/testthat/test-c.r @@ -6,6 +6,11 @@ test_that("basic case works", { expect_equal(str_c(test, collapse = ""), "abc") }) +test_that("obeys tidyverse recycling rules", { + expect_equal(str_c("x", character()), character()) + expect_snapshot(str_c(1:2, 1:3), error = TRUE) +}) + test_that("NULLs are dropped", { test <- letters[1:3] From df000950bc02a01eac877f328335df0411025cf9 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Thu, 22 Apr 2021 11:45:07 -0500 Subject: [PATCH 02/11] Prototype recycling in str_detect() --- R/detect.r | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/R/detect.r b/R/detect.r index a7ad4f0c..5ea8f321 100644 --- a/R/detect.r +++ b/R/detect.r @@ -41,15 +41,21 @@ #' # Returns TRUE if the pattern do NOT match #' str_detect(fruit, "^p", negate = TRUE) str_detect <- function(string, pattern, negate = FALSE) { + args <- str_recycle(string, pattern) + switch(type(pattern), empty = , - bound = str_count(string, pattern) > 0 & !negate, - fixed = stri_detect_fixed(string, pattern, negate = negate, opts_fixed = opts(pattern)), - coll = stri_detect_coll(string, pattern, negate = negate, opts_collator = opts(pattern)), - regex = stri_detect_regex(string, pattern, negate = negate, opts_regex = opts(pattern)) + bound = str_count(args$string, args$pattern) > 0 & !negate, + fixed = stri_detect_fixed(args$string, args$pattern, negate = negate, opts_fixed = opts(pattern)), + coll = stri_detect_coll(args$string, args$pattern, negate = negate, opts_collator = opts(pattern)), + regex = stri_detect_regex(args$string, args$pattern, negate = negate, opts_regex = opts(pattern)) ) } +str_recycle <- function(string, pattern, replacement = NULL) { + vctrs::vec_recycle_common(string = string, pattern = pattern, replacement = replacement) +} + #' Detect the presence or absence of a pattern at the beginning or end of a #' string. #' From fb6ad1a9ffa6ac2eeb3a8a8edde6e211865a3936 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Sun, 25 Apr 2021 07:28:22 -0500 Subject: [PATCH 03/11] Apply rules to all detect functions --- R/detect.r | 40 ++++++++++++++++----------------- R/utils.R | 5 +++++ tests/testthat/_snaps/detect.md | 19 ++++++++++++++++ tests/testthat/test-detect.r | 9 ++++++++ 4 files changed, 52 insertions(+), 21 deletions(-) create mode 100644 tests/testthat/_snaps/detect.md diff --git a/R/detect.r b/R/detect.r index 5ea8f321..a9701d41 100644 --- a/R/detect.r +++ b/R/detect.r @@ -52,10 +52,6 @@ str_detect <- function(string, pattern, negate = FALSE) { ) } -str_recycle <- function(string, pattern, replacement = NULL) { - vctrs::vec_recycle_common(string = string, pattern = pattern, replacement = replacement) -} - #' Detect the presence or absence of a pattern at the beginning or end of a #' string. #' @@ -81,16 +77,16 @@ str_recycle <- function(string, pattern, replacement = NULL) { #' str_ends(fruit, "e") #' str_ends(fruit, "e", negate = TRUE) str_starts <- function(string, pattern, negate = FALSE) { - switch( - type(pattern), + args <- str_recycle(string, pattern) + + switch(type(pattern), empty = , bound = stop("boundary() patterns are not supported."), - fixed = stri_startswith_fixed(string, pattern, negate = negate, opts_fixed = opts(pattern)), - coll = stri_startswith_coll(string, pattern, negate = negate, opts_collator = opts(pattern)), + fixed = stri_startswith_fixed(args$string, args$pattern, negate = negate, opts_fixed = opts(pattern)), + coll = stri_startswith_coll(args$string, args$pattern, negate = negate, opts_collator = opts(pattern)), regex = { - pattern2 <- paste0("^(", pattern, ")") - attributes(pattern2) <- attributes(pattern) - str_detect(string, pattern2, negate) + args$pattern <- paste0("^(", args$pattern, ")") + stri_detect_regex(args$string, args$pattern, negate = negate, opts_regex = opts(pattern)) } ) } @@ -98,16 +94,17 @@ str_starts <- function(string, pattern, negate = FALSE) { #' @rdname str_starts #' @export str_ends <- function(string, pattern, negate = FALSE) { + args <- str_recycle(string, pattern) + switch(type(pattern), - empty = , - bound = stop("boundary() patterns are not supported."), - fixed = stri_endswith_fixed(string, pattern, negate = negate, opts_fixed = opts(pattern)), - coll = stri_endswith_coll(string, pattern, negate = negate, opts_collator = opts(pattern)), - regex = { - pattern2 <- paste0("(", pattern, ")$") - attributes(pattern2) <- attributes(pattern) - str_detect(string, pattern2, negate) - } + empty = , + bound = stop("boundary() patterns are not supported."), + fixed = stri_endswith_fixed(args$string, args$pattern, negate = negate, opts_fixed = opts(pattern)), + coll = stri_endswith_coll(args$string, args$pattern, negate = negate, opts_collator = opts(pattern)), + regex = { + args$pattern <- paste0("(", args$pattern, ")$") + stri_detect_regex(args$string, args$pattern, negate = negate, opts_regex = opts(pattern)) + } ) } @@ -141,7 +138,8 @@ str_like <- function(string, pattern, ignore_case = TRUE) { } pattern <- regex(like_to_regex(pattern), ignore_case = ignore_case) - stri_detect_regex(string, pattern, opts_regex = opts(pattern)) + args <- str_recycle(string, pattern) + stri_detect_regex(args$string, args$pattern, opts_regex = opts(pattern)) } like_to_regex <- function(pattern) { diff --git a/R/utils.R b/R/utils.R index 036dd307..08bc9cf7 100644 --- a/R/utils.R +++ b/R/utils.R @@ -7,3 +7,8 @@ #' @importFrom magrittr %>% #' @usage lhs \%>\% rhs NULL + +str_recycle <- function(string, pattern, replacement = NULL) { + vctrs::vec_recycle_common(string = string, pattern = pattern, replacement = replacement) +} + diff --git a/tests/testthat/_snaps/detect.md b/tests/testthat/_snaps/detect.md new file mode 100644 index 00000000..2bd56357 --- /dev/null +++ b/tests/testthat/_snaps/detect.md @@ -0,0 +1,19 @@ +# functions use tidyverse recycling rules + + Code + str_detect(1:2, 1:3) + Error + Can't recycle `string` (size 2) to match `pattern` (size 3). + Code + str_starts(1:2, 1:3) + Error + Can't recycle `string` (size 2) to match `pattern` (size 3). + Code + str_ends(1:2, 1:3) + Error + Can't recycle `string` (size 2) to match `pattern` (size 3). + Code + str_like(1:2, c("a", "b", "c")) + Error + Can't recycle `string` (size 2) to match `pattern` (size 3). + diff --git a/tests/testthat/test-detect.r b/tests/testthat/test-detect.r index ce6db5dd..af156bf2 100644 --- a/tests/testthat/test-detect.r +++ b/tests/testthat/test-detect.r @@ -56,6 +56,15 @@ test_that("str_ends works", { expect_false(str_ends("ab", "c|a")) }) +test_that("functions use tidyverse recycling rules", { + expect_snapshot(error = TRUE, { + str_detect(1:2, 1:3) + str_starts(1:2, 1:3) + str_ends(1:2, 1:3) + str_like(1:2, c("a", "b", "c")) + }) +}) + # str_like ---------------------------------------------------------------- From 9d159fa6e24c14b08d1b103899392074360c43c1 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Sun, 25 Apr 2021 07:50:08 -0500 Subject: [PATCH 04/11] Recycling from conv.R to match.R --- R/conv.R | 4 ++++ R/count.r | 12 +++++++----- R/dup.r | 3 ++- R/extract.r | 24 ++++++++++++++---------- R/flatten.R | 3 +++ R/locate.r | 23 +++++++++++++---------- R/match.r | 20 ++++++++++++++++---- tests/testthat/test-conv.R | 4 ++++ tests/testthat/test-count.r | 4 ++++ tests/testthat/test-dup.r | 4 ++++ tests/testthat/test-extract.r | 12 ++++++++++++ tests/testthat/test-flatten.R | 4 ++++ tests/testthat/test-locate.r | 5 +++++ tests/testthat/test-match.r | 11 +++++++++++ 14 files changed, 103 insertions(+), 30 deletions(-) diff --git a/R/conv.R b/R/conv.R index de14e80b..42e5871a 100644 --- a/R/conv.R +++ b/R/conv.R @@ -13,5 +13,9 @@ #' str_conv(x, "ISO-8859-2") # Polish "a with ogonek" #' str_conv(x, "ISO-8859-1") # Plus-minus str_conv <- function(string, encoding) { + if (!is_string(encoding)) { + abort("`encoding` must be a single string.") + } + stri_conv(string, encoding, "UTF-8") } diff --git a/R/count.r b/R/count.r index 2febc3a7..78ec765f 100644 --- a/R/count.r +++ b/R/count.r @@ -21,11 +21,13 @@ #' str_count(c("a.", "...", ".a.a"), ".") #' str_count(c("a.", "...", ".a.a"), fixed(".")) str_count <- function(string, pattern = "") { + args <- str_recycle(string, pattern) + switch(type(pattern), - empty = stri_count_boundaries(string, opts_brkiter = opts(pattern)), - bound = stri_count_boundaries(string, opts_brkiter = opts(pattern)), - fixed = stri_count_fixed(string, pattern, opts_fixed = opts(pattern)), - coll = stri_count_coll(string, pattern, opts_collator = opts(pattern)), - regex = stri_count_regex(string, pattern, opts_regex = opts(pattern)) + empty = , + bound = stri_count_boundaries(args$string, opts_brkiter = opts(pattern)), + fixed = stri_count_fixed(args$string, args$pattern, opts_fixed = opts(pattern)), + coll = stri_count_coll(args$string, args$pattern, opts_collator = opts(pattern)), + regex = stri_count_regex(args$string, args$pattern, opts_regex = opts(pattern)) ) } diff --git a/R/dup.r b/R/dup.r index 910e74e9..186be642 100644 --- a/R/dup.r +++ b/R/dup.r @@ -12,5 +12,6 @@ #' str_dup(fruit, 1:3) #' str_c("ba", str_dup("na", 0:5)) str_dup <- function(string, times) { - stri_dup(string, times) + args <- vctrs::vec_recycle_common(string = string, times = times) + stri_dup(args$string, args$times) } diff --git a/R/extract.r b/R/extract.r index 3b6b4524..d1f625a7 100644 --- a/R/extract.r +++ b/R/extract.r @@ -28,28 +28,32 @@ #' # Extract all words #' str_extract_all("This is, suprisingly, a sentence.", boundary("word")) str_extract <- function(string, pattern) { + args <- str_recycle(string, pattern) + switch(type(pattern), - empty = stri_extract_first_boundaries(string, pattern, opts_brkiter = opts(pattern)), - bound = stri_extract_first_boundaries(string, pattern, opts_brkiter = opts(pattern)), - fixed = stri_extract_first_fixed(string, pattern, opts_fixed = opts(pattern)), - coll = stri_extract_first_coll(string, pattern, opts_collator = opts(pattern)), - regex = stri_extract_first_regex(string, pattern, opts_regex = opts(pattern)) + empty = stri_extract_first_boundaries(args$string, args$pattern, opts_brkiter = opts(pattern)), + bound = stri_extract_first_boundaries(args$string, args$pattern, opts_brkiter = opts(pattern)), + fixed = stri_extract_first_fixed(args$string, args$pattern, opts_fixed = opts(pattern)), + coll = stri_extract_first_coll(args$string, args$pattern, opts_collator = opts(pattern)), + regex = stri_extract_first_regex(args$string, args$pattern, opts_regex = opts(pattern)) ) } #' @rdname str_extract #' @export str_extract_all <- function(string, pattern, simplify = FALSE) { + args <- str_recycle(string, pattern) + switch(type(pattern), - empty = stri_extract_all_boundaries(string, pattern, + empty = stri_extract_all_boundaries(args$string, args$pattern, simplify = simplify, omit_no_match = TRUE, opts_brkiter = opts(pattern)), - bound = stri_extract_all_boundaries(string, pattern, + bound = stri_extract_all_boundaries(args$string, args$pattern, simplify = simplify, omit_no_match = TRUE, opts_brkiter = opts(pattern)), - fixed = stri_extract_all_fixed(string, pattern, + fixed = stri_extract_all_fixed(args$string, args$pattern, simplify = simplify, omit_no_match = TRUE, opts_fixed = opts(pattern)), - coll = stri_extract_all_coll(string, pattern, + coll = stri_extract_all_coll(args$string, args$pattern, simplify = simplify, omit_no_match = TRUE, opts_collator = opts(pattern)), - regex = stri_extract_all_regex(string, pattern, + regex = stri_extract_all_regex(args$string, args$pattern, simplify = simplify, omit_no_match = TRUE, opts_regex = opts(pattern)) ) } diff --git a/R/flatten.R b/R/flatten.R index 163b3859..797dbc05 100644 --- a/R/flatten.R +++ b/R/flatten.R @@ -8,5 +8,8 @@ #' str_flatten(letters) #' str_flatten(letters, "-") str_flatten <- function(string, collapse = "") { + if (!is_string(collapse)) { + abort("`collapse` must be a single string.") + } stri_flatten(string, collapse = collapse) } diff --git a/R/locate.r b/R/locate.r index efb45509..a1704e62 100644 --- a/R/locate.r +++ b/R/locate.r @@ -26,26 +26,29 @@ #' # Find location of every character #' str_locate_all(fruit, "") str_locate <- function(string, pattern) { + args <- str_recycle(string, pattern) + switch(type(pattern), - empty = stri_locate_first_boundaries(string, opts_brkiter = opts(pattern)), - bound = stri_locate_first_boundaries(string, opts_brkiter = opts(pattern)), - fixed = stri_locate_first_fixed(string, pattern, opts_fixed = opts(pattern)), - coll = stri_locate_first_coll(string, pattern, opts_collator = opts(pattern)), - regex = stri_locate_first_regex(string, pattern, opts_regex = opts(pattern)) + empty = , + bound = stri_locate_first_boundaries(args$string, opts_brkiter = opts(pattern)), + fixed = stri_locate_first_fixed(args$string, args$pattern, opts_fixed = opts(pattern)), + coll = stri_locate_first_coll(args$string, args$pattern, opts_collator = opts(pattern)), + regex = stri_locate_first_regex(args$string, args$pattern, opts_regex = opts(pattern)) ) } #' @rdname str_locate #' @export str_locate_all <- function(string, pattern) { + args <- str_recycle(string, pattern) opts <- opts(pattern) switch(type(pattern), - empty = stri_locate_all_boundaries(string, omit_no_match = TRUE, opts_brkiter = opts), - bound = stri_locate_all_boundaries(string, omit_no_match = TRUE, opts_brkiter = opts), - fixed = stri_locate_all_fixed(string, pattern, omit_no_match = TRUE, opts_fixed = opts), - regex = stri_locate_all_regex(string, pattern, omit_no_match = TRUE, opts_regex = opts), - coll = stri_locate_all_coll(string, pattern, omit_no_match = TRUE, opts_collator = opts) + empty = , + bound = stri_locate_all_boundaries(args$string, omit_no_match = TRUE, opts_brkiter = opts), + fixed = stri_locate_all_fixed(args$string, args$pattern, omit_no_match = TRUE, opts_fixed = opts), + regex = stri_locate_all_regex(args$string, args$pattern, omit_no_match = TRUE, opts_regex = opts), + coll = stri_locate_all_coll(args$string, args$pattern, omit_no_match = TRUE, opts_collator = opts) ) } diff --git a/R/match.r b/R/match.r index 968098d7..f442b224 100644 --- a/R/match.r +++ b/R/match.r @@ -38,8 +38,14 @@ str_match <- function(string, pattern) { stop("Can only match regular expressions", call. = FALSE) } - stri_match_first_regex(string, - pattern, + args <- str_recycle(string, pattern) + if (length(args$pattern) == 0) { + # In order to get correctly sized output + args$pattern <- pattern + } + + stri_match_first_regex(args$string, + args$pattern, opts_regex = opts(pattern) ) } @@ -51,8 +57,14 @@ str_match_all <- function(string, pattern) { stop("Can only match regular expressions", call. = FALSE) } - stri_match_all_regex(string, - pattern, + args <- str_recycle(string, pattern) + if (length(args$pattern) == 0) { + # In order to get correctly sized output + args$pattern <- pattern + } + + stri_match_all_regex(args$string, + args$pattern, omit_no_match = TRUE, opts_regex = opts(pattern) ) diff --git a/tests/testthat/test-conv.R b/tests/testthat/test-conv.R index b5eed416..44b35937 100644 --- a/tests/testthat/test-conv.R +++ b/tests/testthat/test-conv.R @@ -4,3 +4,7 @@ test_that("encoding conversion works", { x <- rawToChar(as.raw(177)) expect_equal(str_conv(x, "latin1"), "±") }) + +test_that("check encoding argument", { + expect_error(str_conv("A", c("ISO-8859-1", "ISO-8859-2")), "single string") +}) diff --git a/tests/testthat/test-count.r b/tests/testthat/test-count.r index d26f2f15..781b0200 100644 --- a/tests/testthat/test-count.r +++ b/tests/testthat/test-count.r @@ -5,3 +5,7 @@ test_that("counts are as expected", { expect_equal(str_count(fruit, "e"), c(1, 0, 1, 2)) expect_equal(str_count(fruit, c("a", "b", "p", "n")), c(1, 1, 1, 1)) }) + +test_that("uses tidyverse recycling rules", { + expect_error(str_count(1:2, 1:3), class = "vctrs_error_incompatible_size") +}) diff --git a/tests/testthat/test-dup.r b/tests/testthat/test-dup.r index b3d41ed9..2872ae37 100644 --- a/tests/testthat/test-dup.r +++ b/tests/testthat/test-dup.r @@ -9,3 +9,7 @@ test_that("0 duplicates equals empty string", { expect_equal(str_dup("a", 0), "") expect_equal(str_dup(c("a", "b"), 0), rep("", 2)) }) + +test_that("uses tidyverse recycling rules", { + expect_error(str_dup(1:2, 1:3), class = "vctrs_error_incompatible_size") +}) diff --git a/tests/testthat/test-extract.r b/tests/testthat/test-extract.r index 4127843e..fec3e0a4 100644 --- a/tests/testthat/test-extract.r +++ b/tests/testthat/test-extract.r @@ -13,6 +13,18 @@ test_that("single pattern extracted correctly", { }) +test_that("uses tidyverse recycling rules", { + expect_error( + str_extract(c("a", "b"), c("a", "b", "c")), + class = "vctrs_error_incompatible_size" + ) + expect_error( + str_extract_all(c("a", "b"), c("a", "b", "c")), + class = "vctrs_error_incompatible_size" + ) +}) + + test_that("no match yields empty vector", { expect_equal(str_extract_all("a", "b")[[1]], character()) }) diff --git a/tests/testthat/test-flatten.R b/tests/testthat/test-flatten.R index 234cd70b..d3e21777 100644 --- a/tests/testthat/test-flatten.R +++ b/tests/testthat/test-flatten.R @@ -1,3 +1,7 @@ test_that("equivalent to paste with collapse", { expect_equal(str_flatten(letters), paste0(letters, collapse = "")) }) + +test_that("collapse must be single string", { + expect_error(str_flatten("A", c("a", "b")), "single string") +}) diff --git a/tests/testthat/test-locate.r b/tests/testthat/test-locate.r index 2d5e81a2..b429c384 100644 --- a/tests/testthat/test-locate.r +++ b/tests/testthat/test-locate.r @@ -5,6 +5,11 @@ test_that("basic location matching works", { expect_equal(str_locate("abc", ".+")[1, ], c(start = 1, end = 3)) }) +test_that("uses tidyverse recycling rules", { + expect_error(str_locate(1:2, 1:3), class = "vctrs_error_incompatible_size") + expect_error(str_locate_all(1:2, 1:3), class = "vctrs_error_incompatible_size") +}) + test_that("locations are integers", { strings <- c("a b c", "d e f") expect_true(is.integer(str_locate(strings, "[a-z]"))) diff --git a/tests/testthat/test-match.r b/tests/testthat/test-match.r index 605589ae..146a118a 100644 --- a/tests/testthat/test-match.r +++ b/tests/testthat/test-match.r @@ -68,3 +68,14 @@ test_that("match and match_all fail when pattern is not a regex", { expect_error(str_match(phones, fixed("3"))) expect_error(str_match_all(phones, coll("9"))) }) + +test_that("uses tidyverse recycling rules", { + expect_error( + str_match(c("a", "b"), c("a", "b", "c")), + class = "vctrs_error_incompatible_size" + ) + expect_error( + str_match_all(c("a", "b"), c("a", "b", "c")), + class = "vctrs_error_incompatible_size" + ) +}) From 7454e50852036f4f869770ee981b6d082bf52dbf Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Sun, 25 Apr 2021 08:24:28 -0500 Subject: [PATCH 05/11] Simpler strategy since stringi already does the recycling we expect --- R/count.r | 10 +++++----- R/detect.r | 34 +++++++++++++++++----------------- R/extract.r | 24 ++++++++++++------------ R/locate.r | 20 ++++++++++---------- R/match.r | 22 ++++++---------------- R/pad.r | 8 +++++--- R/replace.r | 4 ++++ R/utils.R | 10 ++++++++-- tests/testthat/test-pad.r | 11 +++++++++++ 9 files changed, 78 insertions(+), 65 deletions(-) diff --git a/R/count.r b/R/count.r index 78ec765f..2d4399c3 100644 --- a/R/count.r +++ b/R/count.r @@ -21,13 +21,13 @@ #' str_count(c("a.", "...", ".a.a"), ".") #' str_count(c("a.", "...", ".a.a"), fixed(".")) str_count <- function(string, pattern = "") { - args <- str_recycle(string, pattern) + check_lengths(string, pattern) switch(type(pattern), empty = , - bound = stri_count_boundaries(args$string, opts_brkiter = opts(pattern)), - fixed = stri_count_fixed(args$string, args$pattern, opts_fixed = opts(pattern)), - coll = stri_count_coll(args$string, args$pattern, opts_collator = opts(pattern)), - regex = stri_count_regex(args$string, args$pattern, opts_regex = opts(pattern)) + bound = stri_count_boundaries(string, opts_brkiter = opts(pattern)), + fixed = stri_count_fixed(string, pattern, opts_fixed = opts(pattern)), + coll = stri_count_coll(string, pattern, opts_collator = opts(pattern)), + regex = stri_count_regex(string, pattern, opts_regex = opts(pattern)) ) } diff --git a/R/detect.r b/R/detect.r index a9701d41..28889f05 100644 --- a/R/detect.r +++ b/R/detect.r @@ -41,14 +41,14 @@ #' # Returns TRUE if the pattern do NOT match #' str_detect(fruit, "^p", negate = TRUE) str_detect <- function(string, pattern, negate = FALSE) { - args <- str_recycle(string, pattern) + check_lengths(string, pattern) switch(type(pattern), empty = , - bound = str_count(args$string, args$pattern) > 0 & !negate, - fixed = stri_detect_fixed(args$string, args$pattern, negate = negate, opts_fixed = opts(pattern)), - coll = stri_detect_coll(args$string, args$pattern, negate = negate, opts_collator = opts(pattern)), - regex = stri_detect_regex(args$string, args$pattern, negate = negate, opts_regex = opts(pattern)) + bound = str_count(string, pattern) > 0 & !negate, + fixed = stri_detect_fixed(string, pattern, negate = negate, opts_fixed = opts(pattern)), + coll = stri_detect_coll(string, pattern, negate = negate, opts_collator = opts(pattern)), + regex = stri_detect_regex(string, pattern, negate = negate, opts_regex = opts(pattern)) ) } @@ -77,16 +77,16 @@ str_detect <- function(string, pattern, negate = FALSE) { #' str_ends(fruit, "e") #' str_ends(fruit, "e", negate = TRUE) str_starts <- function(string, pattern, negate = FALSE) { - args <- str_recycle(string, pattern) + check_lengths(string, pattern) switch(type(pattern), empty = , bound = stop("boundary() patterns are not supported."), - fixed = stri_startswith_fixed(args$string, args$pattern, negate = negate, opts_fixed = opts(pattern)), - coll = stri_startswith_coll(args$string, args$pattern, negate = negate, opts_collator = opts(pattern)), + fixed = stri_startswith_fixed(string, pattern, negate = negate, opts_fixed = opts(pattern)), + coll = stri_startswith_coll(string, pattern, negate = negate, opts_collator = opts(pattern)), regex = { - args$pattern <- paste0("^(", args$pattern, ")") - stri_detect_regex(args$string, args$pattern, negate = negate, opts_regex = opts(pattern)) + pattern2 <- paste0("^(", pattern, ")") + stri_detect_regex(string, pattern2, negate = negate, opts_regex = opts(pattern)) } ) } @@ -94,16 +94,16 @@ str_starts <- function(string, pattern, negate = FALSE) { #' @rdname str_starts #' @export str_ends <- function(string, pattern, negate = FALSE) { - args <- str_recycle(string, pattern) + check_lengths(string, pattern) switch(type(pattern), empty = , bound = stop("boundary() patterns are not supported."), - fixed = stri_endswith_fixed(args$string, args$pattern, negate = negate, opts_fixed = opts(pattern)), - coll = stri_endswith_coll(args$string, args$pattern, negate = negate, opts_collator = opts(pattern)), + fixed = stri_endswith_fixed(string, pattern, negate = negate, opts_fixed = opts(pattern)), + coll = stri_endswith_coll(string, pattern, negate = negate, opts_collator = opts(pattern)), regex = { - args$pattern <- paste0("(", args$pattern, ")$") - stri_detect_regex(args$string, args$pattern, negate = negate, opts_regex = opts(pattern)) + pattern2 <- paste0("(", pattern, ")$") + stri_detect_regex(string, pattern2, negate = negate, opts_regex = opts(pattern)) } ) } @@ -138,8 +138,8 @@ str_like <- function(string, pattern, ignore_case = TRUE) { } pattern <- regex(like_to_regex(pattern), ignore_case = ignore_case) - args <- str_recycle(string, pattern) - stri_detect_regex(args$string, args$pattern, opts_regex = opts(pattern)) + check_lengths(string, pattern) + stri_detect_regex(string, pattern, opts_regex = opts(pattern)) } like_to_regex <- function(pattern) { diff --git a/R/extract.r b/R/extract.r index d1f625a7..02e92965 100644 --- a/R/extract.r +++ b/R/extract.r @@ -28,32 +28,32 @@ #' # Extract all words #' str_extract_all("This is, suprisingly, a sentence.", boundary("word")) str_extract <- function(string, pattern) { - args <- str_recycle(string, pattern) + check_lengths(string, pattern) switch(type(pattern), - empty = stri_extract_first_boundaries(args$string, args$pattern, opts_brkiter = opts(pattern)), - bound = stri_extract_first_boundaries(args$string, args$pattern, opts_brkiter = opts(pattern)), - fixed = stri_extract_first_fixed(args$string, args$pattern, opts_fixed = opts(pattern)), - coll = stri_extract_first_coll(args$string, args$pattern, opts_collator = opts(pattern)), - regex = stri_extract_first_regex(args$string, args$pattern, opts_regex = opts(pattern)) + empty = stri_extract_first_boundaries(string, pattern, opts_brkiter = opts(pattern)), + bound = stri_extract_first_boundaries(string, pattern, opts_brkiter = opts(pattern)), + fixed = stri_extract_first_fixed(string, pattern, opts_fixed = opts(pattern)), + coll = stri_extract_first_coll(string, pattern, opts_collator = opts(pattern)), + regex = stri_extract_first_regex(string, pattern, opts_regex = opts(pattern)) ) } #' @rdname str_extract #' @export str_extract_all <- function(string, pattern, simplify = FALSE) { - args <- str_recycle(string, pattern) + check_lengths(string, pattern) switch(type(pattern), - empty = stri_extract_all_boundaries(args$string, args$pattern, + empty = stri_extract_all_boundaries(string, pattern, simplify = simplify, omit_no_match = TRUE, opts_brkiter = opts(pattern)), - bound = stri_extract_all_boundaries(args$string, args$pattern, + bound = stri_extract_all_boundaries(string, pattern, simplify = simplify, omit_no_match = TRUE, opts_brkiter = opts(pattern)), - fixed = stri_extract_all_fixed(args$string, args$pattern, + fixed = stri_extract_all_fixed(string, pattern, simplify = simplify, omit_no_match = TRUE, opts_fixed = opts(pattern)), - coll = stri_extract_all_coll(args$string, args$pattern, + coll = stri_extract_all_coll(string, pattern, simplify = simplify, omit_no_match = TRUE, opts_collator = opts(pattern)), - regex = stri_extract_all_regex(args$string, args$pattern, + regex = stri_extract_all_regex(string, pattern, simplify = simplify, omit_no_match = TRUE, opts_regex = opts(pattern)) ) } diff --git a/R/locate.r b/R/locate.r index a1704e62..105d3cb1 100644 --- a/R/locate.r +++ b/R/locate.r @@ -26,29 +26,29 @@ #' # Find location of every character #' str_locate_all(fruit, "") str_locate <- function(string, pattern) { - args <- str_recycle(string, pattern) + check_lengths(string, pattern) switch(type(pattern), empty = , - bound = stri_locate_first_boundaries(args$string, opts_brkiter = opts(pattern)), - fixed = stri_locate_first_fixed(args$string, args$pattern, opts_fixed = opts(pattern)), - coll = stri_locate_first_coll(args$string, args$pattern, opts_collator = opts(pattern)), - regex = stri_locate_first_regex(args$string, args$pattern, opts_regex = opts(pattern)) + bound = stri_locate_first_boundaries(string, opts_brkiter = opts(pattern)), + fixed = stri_locate_first_fixed(string, pattern, opts_fixed = opts(pattern)), + coll = stri_locate_first_coll(string, pattern, opts_collator = opts(pattern)), + regex = stri_locate_first_regex(string, pattern, opts_regex = opts(pattern)) ) } #' @rdname str_locate #' @export str_locate_all <- function(string, pattern) { - args <- str_recycle(string, pattern) + check_lengths(string, pattern) opts <- opts(pattern) switch(type(pattern), empty = , - bound = stri_locate_all_boundaries(args$string, omit_no_match = TRUE, opts_brkiter = opts), - fixed = stri_locate_all_fixed(args$string, args$pattern, omit_no_match = TRUE, opts_fixed = opts), - regex = stri_locate_all_regex(args$string, args$pattern, omit_no_match = TRUE, opts_regex = opts), - coll = stri_locate_all_coll(args$string, args$pattern, omit_no_match = TRUE, opts_collator = opts) + bound = stri_locate_all_boundaries(string, omit_no_match = TRUE, opts_brkiter = opts), + fixed = stri_locate_all_fixed(string, pattern, omit_no_match = TRUE, opts_fixed = opts), + regex = stri_locate_all_regex(string, pattern, omit_no_match = TRUE, opts_regex = opts), + coll = stri_locate_all_coll(string, pattern, omit_no_match = TRUE, opts_collator = opts) ) } diff --git a/R/match.r b/R/match.r index f442b224..6cc9c768 100644 --- a/R/match.r +++ b/R/match.r @@ -38,14 +38,9 @@ str_match <- function(string, pattern) { stop("Can only match regular expressions", call. = FALSE) } - args <- str_recycle(string, pattern) - if (length(args$pattern) == 0) { - # In order to get correctly sized output - args$pattern <- pattern - } - - stri_match_first_regex(args$string, - args$pattern, + check_lengths(string, pattern) + stri_match_first_regex(string, + pattern, opts_regex = opts(pattern) ) } @@ -57,14 +52,9 @@ str_match_all <- function(string, pattern) { stop("Can only match regular expressions", call. = FALSE) } - args <- str_recycle(string, pattern) - if (length(args$pattern) == 0) { - # In order to get correctly sized output - args$pattern <- pattern - } - - stri_match_all_regex(args$string, - args$pattern, + check_lengths(string, pattern) + stri_match_all_regex(string, + pattern, omit_no_match = TRUE, opts_regex = opts(pattern) ) diff --git a/R/pad.r b/R/pad.r index 70137a19..8b8c79a7 100644 --- a/R/pad.r +++ b/R/pad.r @@ -27,11 +27,13 @@ #' # Longer strings are returned unchanged #' str_pad("hadley", 3) str_pad <- function(string, width, side = c("left", "right", "both"), pad = " ", use_length = FALSE) { + args <- vctrs::vec_recycle_common(string = string, width = width, pad = pad) + side <- match.arg(side) switch(side, - left = stri_pad_left(string, width, pad = pad, use_length = use_length), - right = stri_pad_right(string, width, pad = pad, use_length = use_length), - both = stri_pad_both(string, width, pad = pad, use_length = use_length) + left = stri_pad_left(args$string, args$width, pad = args$pad, use_length = use_length), + right = stri_pad_right(args$string, args$width, pad = args$pad, use_length = use_length), + both = stri_pad_both(args$string, args$width, pad = args$pad, use_length = use_length) ) } diff --git a/R/replace.r b/R/replace.r index d718a84e..472b75b9 100644 --- a/R/replace.r +++ b/R/replace.r @@ -69,6 +69,8 @@ str_replace <- function(string, pattern, replacement) { return(str_transform(string, pattern, replacement)) } + check_lengths(string, pattern, replacement) + switch(type(pattern), empty = stop("Empty `pattern` not supported", call. = FALSE), bound = stop("Boundary `pattern` not supported", call. = FALSE), @@ -98,6 +100,8 @@ str_replace_all <- function(string, pattern, replacement) { vec <- TRUE } + check_lengths(string, pattern, replacement) + switch(type(pattern), empty = stop("Empty `pattern`` not supported", call. = FALSE), bound = stop("Boundary `pattern` not supported", call. = FALSE), diff --git a/R/utils.R b/R/utils.R index 08bc9cf7..dac04d0e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -8,7 +8,13 @@ #' @usage lhs \%>\% rhs NULL -str_recycle <- function(string, pattern, replacement = NULL) { - vctrs::vec_recycle_common(string = string, pattern = pattern, replacement = replacement) +check_lengths <- function(string, pattern, replacement = NULL) { + # stringi already correctly recycles vectors of length 0 and 1 + # we just want more stringent vctrs checks for other lengths + vctrs::vec_size_common( + string = string, + pattern = pattern, + replacement = replacement + ) } diff --git a/tests/testthat/test-pad.r b/tests/testthat/test-pad.r index 792e655f..e0af5b09 100644 --- a/tests/testthat/test-pad.r +++ b/tests/testthat/test-pad.r @@ -24,3 +24,14 @@ test_that("padding based of length works", { expect_equal(pad(width = 6), " \u4e2d ") expect_equal(pad(width = 5, use_length = TRUE), " \u4e2d ") }) + +test_that("uses tidyverse recycling rules", { + expect_error( + str_pad(c("a", "b"), 1:3), + class = "vctrs_error_incompatible_size" + ) + expect_error( + str_pad(c("a", "b"), 10, pad = c("a", "b", "c")), + class = "vctrs_error_incompatible_size" + ) +}) From 9dadebbe57657c5bd9b2961704ce94c8177d8a76 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Sun, 25 Apr 2021 08:31:27 -0500 Subject: [PATCH 06/11] Finish remaining functions --- R/split.r | 1 + R/sub.r | 4 ++++ R/subset.R | 2 ++ R/trim.R | 2 +- R/trunc.R | 5 ++++- R/word.r | 8 ++++---- R/wrap.r | 3 +++ 7 files changed, 19 insertions(+), 6 deletions(-) diff --git a/R/split.r b/R/split.r index f397e7e8..a519070d 100644 --- a/R/split.r +++ b/R/split.r @@ -41,6 +41,7 @@ #' str_split_n(fruits, " and ", 3) str_split <- function(string, pattern, n = Inf, simplify = FALSE) { if (identical(n, Inf)) n <- -1L + check_lengths(string, pattern) switch(type(pattern), empty = stri_split_boundaries(string, n = n, simplify = simplify, opts_brkiter = opts(pattern)), diff --git a/R/sub.r b/R/sub.r index 2ddea4d7..865b6f6a 100644 --- a/R/sub.r +++ b/R/sub.r @@ -62,6 +62,8 @@ #' str_sub(x4, 1, 2, omit_na = TRUE) <- NA #' x1; x2; x3; x4 str_sub <- function(string, start = 1L, end = -1L) { + vctrs::vec_size_common(string = string, start = start, end = end) + if (is.matrix(start)) { stri_sub(string, from = start) } else { @@ -73,6 +75,8 @@ str_sub <- function(string, start = 1L, end = -1L) { #' @export #' @rdname str_sub "str_sub<-" <- function(string, start = 1L, end = -1L, omit_na = FALSE, value) { + vctrs::vec_size_common(string = string, start = start, end = end) + if (is.matrix(start)) { stri_sub(string, from = start, omit_na = omit_na) <- value } else { diff --git a/R/subset.R b/R/subset.R index ed5ad93c..16c5f625 100644 --- a/R/subset.R +++ b/R/subset.R @@ -30,6 +30,8 @@ #' str_subset(c("a", NA, "b"), ".") #' str_which(c("a", NA, "b"), ".") str_subset <- function(string, pattern, negate = FALSE) { + check_lengths(string, pattern) + switch(type(pattern), empty = , bound = string[str_detect(string, pattern) & !negate], diff --git a/R/trim.R b/R/trim.R index 6022799e..f3ce51e8 100644 --- a/R/trim.R +++ b/R/trim.R @@ -15,7 +15,7 @@ #' str_squish(" String with trailing, middle, and leading white space\t") #' str_squish("\n\nString with excess, trailing and leading white space\n\n") str_trim <- function(string, side = c("both", "left", "right")) { - side <- match.arg(side) + side <- arg_match(side) switch(side, left = stri_trim_left(string), diff --git a/R/trunc.R b/R/trunc.R index d7fe914f..ada0d00d 100644 --- a/R/trunc.R +++ b/R/trunc.R @@ -16,7 +16,10 @@ #' str_trunc <- function(string, width, side = c("right", "left", "center"), ellipsis = "...") { - side <- match.arg(side) + side <- arg_match(side) + if (!is.numeric(width) || length(width) != 1) { + abort("`width` must be a single number") + } too_long <- !is.na(string) & str_length(string) > width width... <- width - str_length(ellipsis) diff --git a/R/word.r b/R/word.r index f0de6bfc..d0316c81 100644 --- a/R/word.r +++ b/R/word.r @@ -27,10 +27,10 @@ #' word(str, 1, sep = fixed('..')) #' word(str, 2, sep = fixed('..')) word <- function(string, start = 1L, end = start, sep = fixed(" ")) { - n <- max(length(string), length(start), length(end)) - string <- rep(string, length.out = n) - start <- rep(start, length.out = n) - end <- rep(end, length.out = n) + args <- vctrs::vec_recycle_common(string = string, start = start, end = end) + string <- args$string + start <- args$start + end <- args$end breaks <- str_locate_all(string, sep) words <- lapply(breaks, invert_match) diff --git a/R/wrap.r b/R/wrap.r index 19a7fbd2..14303204 100644 --- a/R/wrap.r +++ b/R/wrap.r @@ -28,6 +28,9 @@ str_wrap <- function(string, indent = 0, exdent = 0, whitespace_only = TRUE) { + if (!is.numeric(width) || length(width) != 1) { + abort("`width` must be a single number") + } if (width <= 0) width <- 1 out <- stri_wrap(string, width = width, indent = indent, exdent = exdent, From c131149990104487bc912d1fac57b58b27ecd40f Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Sun, 25 Apr 2021 08:34:25 -0500 Subject: [PATCH 07/11] Add news bullets --- NEWS.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/NEWS.md b/NEWS.md index 1ed5a3bc..51fd0134 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,15 @@ # stringr (development version) +* stringr functions now consistently implement the tidyverse recycling rules + (#372). Overall this is a fairly minor change as stringi was already very + close to the tidyverse rules. There are only two major changes: + + * Only vectors of length 1 are recycled: + `str_detect(letters, letters[1:2])` previously worked by now errors. + + * `str_c()` ignores `NULLs`, rather than treating them as length 0 + vectors. + * `str_pad()` gains `use_length` argument to control whether to use the total code point width or the number of code points as "width" of a string (#190). From 0bf518363a7a73cee61fac9000f8972588b10215 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Sun, 25 Apr 2021 08:40:27 -0500 Subject: [PATCH 08/11] Use arg_match & vec_size_common consistently --- R/dup.r | 4 ++-- R/modifiers.r | 2 +- R/pad.r | 11 +++++------ 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/R/dup.r b/R/dup.r index 186be642..8b8f5f0c 100644 --- a/R/dup.r +++ b/R/dup.r @@ -12,6 +12,6 @@ #' str_dup(fruit, 1:3) #' str_c("ba", str_dup("na", 0:5)) str_dup <- function(string, times) { - args <- vctrs::vec_recycle_common(string = string, times = times) - stri_dup(args$string, args$times) + vctrs::vec_size_common(string = string, times = times) + stri_dup(string, times) } diff --git a/R/modifiers.r b/R/modifiers.r index d1eb862c..223fd362 100644 --- a/R/modifiers.r +++ b/R/modifiers.r @@ -127,7 +127,7 @@ regex <- function(pattern, ignore_case = FALSE, multiline = FALSE, #' @rdname modifiers boundary <- function(type = c("character", "line_break", "sentence", "word"), skip_word_none = NA, ...) { - type <- match.arg(type) + type <- arg_match(type) if (identical(skip_word_none, NA)) { skip_word_none <- type == "word" diff --git a/R/pad.r b/R/pad.r index 8b8c79a7..ceeae046 100644 --- a/R/pad.r +++ b/R/pad.r @@ -27,13 +27,12 @@ #' # Longer strings are returned unchanged #' str_pad("hadley", 3) str_pad <- function(string, width, side = c("left", "right", "both"), pad = " ", use_length = FALSE) { - args <- vctrs::vec_recycle_common(string = string, width = width, pad = pad) - - side <- match.arg(side) + vctrs::vec_size_common(string = string, width = width, pad = pad) + side <- arg_match(side) switch(side, - left = stri_pad_left(args$string, args$width, pad = args$pad, use_length = use_length), - right = stri_pad_right(args$string, args$width, pad = args$pad, use_length = use_length), - both = stri_pad_both(args$string, args$width, pad = args$pad, use_length = use_length) + left = stri_pad_left(string, width, pad = pad, use_length = use_length), + right = stri_pad_right(string, width, pad = pad, use_length = use_length), + both = stri_pad_both(string, width, pad = pad, use_length = use_length) ) } From fb14852349d853f3aac4f11e15f36410e8ddfe89 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Mon, 26 Apr 2021 07:06:24 -0500 Subject: [PATCH 09/11] Rely on stringi recycling in str_c --- R/c.r | 7 +++++-- tests/testthat/_snaps/c.md | 4 ++-- tests/testthat/test-c.r | 12 +++++------- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/R/c.r b/R/c.r index 663761d0..e781b1c5 100644 --- a/R/c.r +++ b/R/c.r @@ -56,6 +56,9 @@ #' #' @import stringi str_c <- function(..., sep = "", collapse = NULL) { - dots <- vctrs::vec_recycle_common(...) - exec(stri_c, !!!dots, sep = sep, collapse = collapse, ignore_null = TRUE) + dots <- list(...) + dots <- dots[!map_lgl(dots, is.null)] + vctrs::vec_size_common(!!!dots) + + exec(stri_c, !!!dots, sep = sep, collapse = collapse) } diff --git a/tests/testthat/_snaps/c.md b/tests/testthat/_snaps/c.md index d3069185..d745db8e 100644 --- a/tests/testthat/_snaps/c.md +++ b/tests/testthat/_snaps/c.md @@ -1,7 +1,7 @@ # obeys tidyverse recycling rules Code - str_c(1:2, 1:3) + str_c(c("x", "y"), character()) Error - Can't recycle `..1` (size 2) to match `..2` (size 3). + Can't recycle `..1` (size 2) to match `..2` (size 0). diff --git a/tests/testthat/test-c.r b/tests/testthat/test-c.r index 0645b1cd..2d5a4c77 100644 --- a/tests/testthat/test-c.r +++ b/tests/testthat/test-c.r @@ -7,13 +7,11 @@ test_that("basic case works", { }) test_that("obeys tidyverse recycling rules", { - expect_equal(str_c("x", character()), character()) - expect_snapshot(str_c(1:2, 1:3), error = TRUE) -}) + expect_equal(str_c(), character()) -test_that("NULLs are dropped", { - test <- letters[1:3] + expect_equal(str_c("x", character()), character()) + expect_equal(str_c("x", NULL), "x") - expect_equal(str_c(test, NULL), test) - expect_equal(str_c(test, NULL, "a", sep = " "), c("a a", "b a", "c a")) + expect_snapshot(str_c(c("x", "y"), character()), error = TRUE) + expect_equal(str_c(c("x", "y"), NULL), c("x", "y")) }) From 422b660c923c941657f3d972764e369ecec6df44 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Mon, 26 Apr 2021 07:15:12 -0500 Subject: [PATCH 10/11] Tweak news --- NEWS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 7ad40be0..9fc8b441 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,8 +4,8 @@ (#372). Overall this is a fairly minor change as stringi was already very close to the tidyverse rules. There are only two major changes: - * Only vectors of length 1 are recycled: - `str_detect(letters, letters[1:2])` previously worked by now errors. + * Only vectors of length 1 are recycled: previously, + `str_detect(letters, c("x", "y"))` worked, but it now errors. * `str_c()` ignores `NULLs`, rather than treating them as length 0 vectors. From 1d45de9566855675439705bac3c123e40178a3de Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Mon, 26 Apr 2021 07:24:35 -0500 Subject: [PATCH 11/11] Polishing str_c() --- NEWS.md | 6 +++-- R/c.r | 49 ++++++++++++++++++++------------------ R/stringr-package.R | 1 + man/str_c.Rd | 43 +++++++++++++++------------------ tests/testthat/_snaps/c.md | 11 +++++++++ tests/testthat/test-c.r | 7 ++++++ 6 files changed, 68 insertions(+), 49 deletions(-) diff --git a/NEWS.md b/NEWS.md index 9fc8b441..b21ed904 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,6 +9,9 @@ * `str_c()` ignores `NULLs`, rather than treating them as length 0 vectors. + + Additionally, many more non-vectorised arguments now throw errors, + rather than warnings, if supplied a vector. * `str_flatten()` gains a `last` argument that optionally override the final separator (#377). @@ -25,8 +28,7 @@ requires a pattern so you can use it to display strings with special characters. -* `str_c()` and `str_length()` are superseded in favour of `paste0()` and - `nchar()` respectively (#356). +* `str_length()` is superseded in favour of `nchar()` respectively (#356). * `str_wrap()` breaks only at whitespace by default; set `whitespace_only = FALSE` to return to the previous behaviour (#335, @rjpat). diff --git a/R/c.r b/R/c.r index e781b1c5..383575d3 100644 --- a/R/c.r +++ b/R/c.r @@ -1,16 +1,15 @@ #' Join multiple strings into a single string #' -#' `str_c()` is a version of `paste0()` that uses tidyverse recycling and +#' @description +#' `str_c()` combines multiple character vectors into a single character +#' vector. It's very similar to [`paste0()`] but uses tidyverse recycling and #' `NA` rules. #' -#' @details -#' -#' To understand how `str_c` works, you need to imagine that you are building up -#' a matrix of strings. Each input argument forms a column, and is expanded to -#' the length of the longest argument, using the usual recyling rules. The -#' `sep` string is inserted between each column. If collapse is `NULL` each row -#' is collapsed into a single string. If non-`NULL` that string is inserted at -#' the end of each row, and the entire matrix collapsed to a single string. +#' One way to understand how `str_c()` works is picture a 2d matrix of strings, +#' where each argument forms a column. `sep` is inserted between each column, +#' and then each row is combined together into a single string. If `collapse` +#' is set, it's inserted between each row, and then the result is again +#' combined, this time into a single string. #' #' @param ... One or more character vectors. #' @@ -19,16 +18,15 @@ #' #' Like most other R functions, missing values are "infectious": whenever #' a missing value is combined with another string the result will always -#' be missing. Use [str_replace_na()] to convert `NA` to -#' `"NA"` +#' be missing. Use [dplyr::coalesce()] or [str_replace_na()] to convert +#' desired value. #' @param sep String to insert between input vectors. -#' @param collapse Optional string used to combine input vectors into single -#' string. +#' @param collapse Optional string used to combine output into single +#' string. Generally better to use [str_flatten()] if you needed this +#' behaviour. #' @return If `collapse = NULL` (the default) a character vector with -#' length equal to the longest input string. If `collapse` is -#' non-NULL, a character vector of length 1. -#' @seealso [paste()] for equivalent base R functionality, and -#' [stringi::stri_join()] which this function wraps +#' length equal to the longest input. If `collapse` is a string, a character +#' vector of length 1. #' @export #' @keywords internal #' @examples @@ -44,18 +42,23 @@ #' # Missing inputs give missing outputs #' str_c(c("a", NA, "b"), "-d") #' paste0(c("a", NA, "b"), "-d") +#' # Use str_replace_NA to display literal NAs: +#' str_c(str_replace_na(c("a", NA, "b")), "-d") #' #' # Uses tidyverse recycling rules #' \dontrun{str_c(1:2, 1:3)} # errors #' paste0(1:2, 1:3) #' -#' # Missing inputs give missing outputs -#' str_c(c("a", NA, "b"), "-d") -#' # Use str_replace_NA to display literal NAs: -#' str_c(str_replace_na(c("a", NA, "b")), "-d") -#' -#' @import stringi +#' str_c("x", character()) +#' paste0("x", character()) str_c <- function(..., sep = "", collapse = NULL) { + if (!is_string(sep)) { + abort("`sep` must be a single string") + } + if (!is.null(collapse) && !is_string(collapse)) { + abort("`collapse` must be NULL or single string") + } + dots <- list(...) dots <- dots[!map_lgl(dots, is.null)] vctrs::vec_size_common(!!!dots) diff --git a/R/stringr-package.R b/R/stringr-package.R index 97c07e14..0aab7515 100644 --- a/R/stringr-package.R +++ b/R/stringr-package.R @@ -2,6 +2,7 @@ "_PACKAGE" ## usethis namespace: start +#' @import stringi #' @import rlang #' @importFrom glue glue #' @importFrom lifecycle deprecated diff --git a/man/str_c.Rd b/man/str_c.Rd index ebf612e3..95285554 100644 --- a/man/str_c.Rd +++ b/man/str_c.Rd @@ -14,30 +14,30 @@ the common length of vector inputs. Like most other R functions, missing values are "infectious": whenever a missing value is combined with another string the result will always -be missing. Use \code{\link[=str_replace_na]{str_replace_na()}} to convert \code{NA} to -\code{"NA"}} +be missing. Use \code{\link[dplyr:coalesce]{dplyr::coalesce()}} or \code{\link[=str_replace_na]{str_replace_na()}} to convert +desired value.} \item{sep}{String to insert between input vectors.} -\item{collapse}{Optional string used to combine input vectors into single -string.} +\item{collapse}{Optional string used to combine output into single +string. Generally better to use \code{\link[=str_flatten]{str_flatten()}} if you needed this +behaviour.} } \value{ If \code{collapse = NULL} (the default) a character vector with -length equal to the longest input string. If \code{collapse} is -non-NULL, a character vector of length 1. +length equal to the longest input. If \code{collapse} is a string, a character +vector of length 1. } \description{ -\code{str_c()} is a version of \code{paste0()} that uses tidyverse recycling and +\code{str_c()} combines multiple character vectors into a single character +vector. It's very similar to \code{\link[=paste0]{paste0()}} but uses tidyverse recycling and \code{NA} rules. -} -\details{ -To understand how \code{str_c} works, you need to imagine that you are building up -a matrix of strings. Each input argument forms a column, and is expanded to -the length of the longest argument, using the usual recyling rules. The -\code{sep} string is inserted between each column. If collapse is \code{NULL} each row -is collapsed into a single string. If non-\code{NULL} that string is inserted at -the end of each row, and the entire matrix collapsed to a single string. + +One way to understand how \code{str_c()} works is picture a 2d matrix of strings, +where each argument forms a column. \code{sep} is inserted between each column, +and then each row is combined together into a single string. If \code{collapse} +is set, it's inserted between each row, and then the result is again +combined, this time into a single string. } \examples{ str_c("Letter: ", letters) @@ -52,19 +52,14 @@ str_c(letters, collapse = ", ") # Missing inputs give missing outputs str_c(c("a", NA, "b"), "-d") paste0(c("a", NA, "b"), "-d") +# Use str_replace_NA to display literal NAs: +str_c(str_replace_na(c("a", NA, "b")), "-d") # Uses tidyverse recycling rules \dontrun{str_c(1:2, 1:3)} # errors paste0(1:2, 1:3) -# Missing inputs give missing outputs -str_c(c("a", NA, "b"), "-d") -# Use str_replace_NA to display literal NAs: -str_c(str_replace_na(c("a", NA, "b")), "-d") - -} -\seealso{ -\code{\link[=paste]{paste()}} for equivalent base R functionality, and -\code{\link[stringi:stri_join]{stringi::stri_join()}} which this function wraps +str_c("x", character()) +paste0("x", character()) } \keyword{internal} diff --git a/tests/testthat/_snaps/c.md b/tests/testthat/_snaps/c.md index d745db8e..905b9b38 100644 --- a/tests/testthat/_snaps/c.md +++ b/tests/testthat/_snaps/c.md @@ -5,3 +5,14 @@ Error Can't recycle `..1` (size 2) to match `..2` (size 0). +# vectorised arguments error + + Code + str_c(letters, sep = c("a", "b")) + Error + `sep` must be a single string + Code + str_c(letters, collapse = c("a", "b")) + Error + `collapse` must be NULL or single string + diff --git a/tests/testthat/test-c.r b/tests/testthat/test-c.r index 2d5a4c77..a63d98f6 100644 --- a/tests/testthat/test-c.r +++ b/tests/testthat/test-c.r @@ -15,3 +15,10 @@ test_that("obeys tidyverse recycling rules", { expect_snapshot(str_c(c("x", "y"), character()), error = TRUE) expect_equal(str_c(c("x", "y"), NULL), c("x", "y")) }) + +test_that("vectorised arguments error", { + expect_snapshot(error = TRUE, { + str_c(letters, sep = c("a", "b")) + str_c(letters, collapse = c("a", "b")) + }) +})