Skip to content
Merged
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ Imports:
magrittr,
rlang,
stringi (>= 1.5.3),
vctrs,
withr
Suggests:
covr,
Expand Down
16 changes: 14 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
# stringr (development version)

* stringr functions now consistently implement the tidyverse recycling rules
(#372). Overall this is a fairly minor change as stringi was already very
close to the tidyverse rules. There are only two major changes:

* Only vectors of length 1 are recycled: previously,
`str_detect(letters, c("x", "y"))` worked, but it now errors.

* `str_c()` ignores `NULLs`, rather than treating them as length 0
vectors.

Additionally, many more non-vectorised arguments now throw errors,
rather than warnings, if supplied a vector.

* `str_flatten()` gains a `last` argument that optionally override the
final separator (#377).

Expand All @@ -15,8 +28,7 @@
requires a pattern so you can use it to display strings with special
characters.

* `str_c()` and `str_length()` are superseded in favour of `paste0()` and
`nchar()` respectively (#356).
* `str_length()` is superseded in favour of `nchar()` respectively (#356).

* `str_wrap()` breaks only at whitespace by default; set
`whitespace_only = FALSE` to return to the previous behaviour (#335, @rjpat).
Expand Down
62 changes: 39 additions & 23 deletions R/c.r
Original file line number Diff line number Diff line change
@@ -1,35 +1,32 @@
#' Join multiple strings into a single string
#'
#' @description
#' `r lifecycle::badge("superseded")`
#' `str_c()` combines multiple character vectors into a single character
#' vector. It's very similar to [`paste0()`] but uses tidyverse recycling and
#' `NA` rules.
#'
#' `str_c()` is no longer needed; please use `paste0()` instead.
#' One way to understand how `str_c()` works is picture a 2d matrix of strings,
#' where each argument forms a column. `sep` is inserted between each column,
#' and then each row is combined together into a single string. If `collapse`
#' is set, it's inserted between each row, and then the result is again
#' combined, this time into a single string.
#'
#' @details
#' @param ... One or more character vectors.
#'
#' To understand how `str_c` works, you need to imagine that you are building up
#' a matrix of strings. Each input argument forms a column, and is expanded to
#' the length of the longest argument, using the usual recyling rules. The
#' `sep` string is inserted between each column. If collapse is `NULL` each row
#' is collapsed into a single string. If non-`NULL` that string is inserted at
#' the end of each row, and the entire matrix collapsed to a single string.
#'
#' @param ... One or more character vectors. Zero length arguments
#' are removed. Short arguments are recycled to the length of the
#' longest.
#' `NULL`s are removed; scalar inputs (vectors of length 1) are recycled to
#' the common length of vector inputs.
#'
#' Like most other R functions, missing values are "infectious": whenever
#' a missing value is combined with another string the result will always
#' be missing. Use [str_replace_na()] to convert `NA` to
#' `"NA"`
#' be missing. Use [dplyr::coalesce()] or [str_replace_na()] to convert
#' desired value.
#' @param sep String to insert between input vectors.
#' @param collapse Optional string used to combine input vectors into single
#' string.
#' @param collapse Optional string used to combine output into single
#' string. Generally better to use [str_flatten()] if you needed this
#' behaviour.
#' @return If `collapse = NULL` (the default) a character vector with
#' length equal to the longest input string. If `collapse` is
#' non-NULL, a character vector of length 1.
#' @seealso [paste()] for equivalent base R functionality, and
#' [stringi::stri_join()] which this function wraps
#' length equal to the longest input. If `collapse` is a string, a character
#' vector of length 1.
#' @export
#' @keywords internal
#' @examples
Expand All @@ -41,11 +38,30 @@
#' str_c(letters, collapse = "")
#' str_c(letters, collapse = ", ")
#'
#' # Differences from paste() ----------------------
#' # Missing inputs give missing outputs
#' str_c(c("a", NA, "b"), "-d")
#' paste0(c("a", NA, "b"), "-d")
#' # Use str_replace_NA to display literal NAs:
#' str_c(str_replace_na(c("a", NA, "b")), "-d")
#' @import stringi
#'
#' # Uses tidyverse recycling rules
#' \dontrun{str_c(1:2, 1:3)} # errors
#' paste0(1:2, 1:3)
#'
#' str_c("x", character())
#' paste0("x", character())
str_c <- function(..., sep = "", collapse = NULL) {
stri_c(..., sep = sep, collapse = collapse, ignore_null = TRUE)
if (!is_string(sep)) {
abort("`sep` must be a single string")
}
if (!is.null(collapse) && !is_string(collapse)) {
abort("`collapse` must be NULL or single string")
}

dots <- list(...)
dots <- dots[!map_lgl(dots, is.null)]
vctrs::vec_size_common(!!!dots)

exec(stri_c, !!!dots, sep = sep, collapse = collapse)
}
4 changes: 4 additions & 0 deletions R/conv.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,9 @@
#' str_conv(x, "ISO-8859-2") # Polish "a with ogonek"
#' str_conv(x, "ISO-8859-1") # Plus-minus
str_conv <- function(string, encoding) {
if (!is_string(encoding)) {
abort("`encoding` must be a single string.")
}

stri_conv(string, encoding, "UTF-8")
}
4 changes: 3 additions & 1 deletion R/count.r
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@
#' str_count(c("a.", "...", ".a.a"), ".")
#' str_count(c("a.", "...", ".a.a"), fixed("."))
str_count <- function(string, pattern = "") {
check_lengths(string, pattern)

switch(type(pattern),
empty = stri_count_boundaries(string, opts_brkiter = opts(pattern)),
empty = ,
bound = stri_count_boundaries(string, opts_brkiter = opts(pattern)),
fixed = stri_count_fixed(string, pattern, opts_fixed = opts(pattern)),
coll = stri_count_coll(string, pattern, opts_collator = opts(pattern)),
Expand Down
32 changes: 18 additions & 14 deletions R/detect.r
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
#' # Returns TRUE if the pattern do NOT match
#' str_detect(fruit, "^p", negate = TRUE)
str_detect <- function(string, pattern, negate = FALSE) {
check_lengths(string, pattern)

switch(type(pattern),
empty = ,
bound = str_count(string, pattern) > 0 & !negate,
Expand Down Expand Up @@ -75,33 +77,34 @@ str_detect <- function(string, pattern, negate = FALSE) {
#' str_ends(fruit, "e")
#' str_ends(fruit, "e", negate = TRUE)
str_starts <- function(string, pattern, negate = FALSE) {
switch(
type(pattern),
check_lengths(string, pattern)

switch(type(pattern),
empty = ,
bound = stop("boundary() patterns are not supported."),
fixed = stri_startswith_fixed(string, pattern, negate = negate, opts_fixed = opts(pattern)),
coll = stri_startswith_coll(string, pattern, negate = negate, opts_collator = opts(pattern)),
coll = stri_startswith_coll(string, pattern, negate = negate, opts_collator = opts(pattern)),
regex = {
pattern2 <- paste0("^(", pattern, ")")
attributes(pattern2) <- attributes(pattern)
str_detect(string, pattern2, negate)
stri_detect_regex(string, pattern2, negate = negate, opts_regex = opts(pattern))
}
)
}

#' @rdname str_starts
#' @export
str_ends <- function(string, pattern, negate = FALSE) {
check_lengths(string, pattern)

switch(type(pattern),
empty = ,
bound = stop("boundary() patterns are not supported."),
fixed = stri_endswith_fixed(string, pattern, negate = negate, opts_fixed = opts(pattern)),
coll = stri_endswith_coll(string, pattern, negate = negate, opts_collator = opts(pattern)),
regex = {
pattern2 <- paste0("(", pattern, ")$")
attributes(pattern2) <- attributes(pattern)
str_detect(string, pattern2, negate)
}
empty = ,
bound = stop("boundary() patterns are not supported."),
fixed = stri_endswith_fixed(string, pattern, negate = negate, opts_fixed = opts(pattern)),
coll = stri_endswith_coll(string, pattern, negate = negate, opts_collator = opts(pattern)),
regex = {
pattern2 <- paste0("(", pattern, ")$")
stri_detect_regex(string, pattern2, negate = negate, opts_regex = opts(pattern))
}
)
}

Expand Down Expand Up @@ -135,6 +138,7 @@ str_like <- function(string, pattern, ignore_case = TRUE) {
}

pattern <- regex(like_to_regex(pattern), ignore_case = ignore_case)
check_lengths(string, pattern)
stri_detect_regex(string, pattern, opts_regex = opts(pattern))
}

Expand Down
1 change: 1 addition & 0 deletions R/dup.r
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,6 @@
#' str_dup(fruit, 1:3)
#' str_c("ba", str_dup("na", 0:5))
str_dup <- function(string, times) {
vctrs::vec_size_common(string = string, times = times)
stri_dup(string, times)
}
4 changes: 4 additions & 0 deletions R/extract.r
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
#' # Extract all words
#' str_extract_all("This is, suprisingly, a sentence.", boundary("word"))
str_extract <- function(string, pattern) {
check_lengths(string, pattern)

switch(type(pattern),
empty = stri_extract_first_boundaries(string, pattern, opts_brkiter = opts(pattern)),
bound = stri_extract_first_boundaries(string, pattern, opts_brkiter = opts(pattern)),
Expand All @@ -40,6 +42,8 @@ str_extract <- function(string, pattern) {
#' @rdname str_extract
#' @export
str_extract_all <- function(string, pattern, simplify = FALSE) {
check_lengths(string, pattern)

switch(type(pattern),
empty = stri_extract_all_boundaries(string, pattern,
simplify = simplify, omit_no_match = TRUE, opts_brkiter = opts(pattern)),
Expand Down
4 changes: 4 additions & 0 deletions R/flatten.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
#' str_flatten(letters[1:2], ", ", ", and ")
#' str_flatten(letters[1], ", ", ", and ")
str_flatten <- function(string, collapse = "", last = NULL) {
if (!is_string(collapse)) {
abort("`collapse` must be a single string.")
}

n <- length(string)
if (!is.null(last) && n >= 2) {
string <- c(
Expand Down
7 changes: 5 additions & 2 deletions R/locate.r
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@
#' # Find location of every character
#' str_locate_all(fruit, "")
str_locate <- function(string, pattern) {
check_lengths(string, pattern)

switch(type(pattern),
empty = stri_locate_first_boundaries(string, opts_brkiter = opts(pattern)),
empty = ,
bound = stri_locate_first_boundaries(string, opts_brkiter = opts(pattern)),
fixed = stri_locate_first_fixed(string, pattern, opts_fixed = opts(pattern)),
coll = stri_locate_first_coll(string, pattern, opts_collator = opts(pattern)),
Expand All @@ -38,10 +40,11 @@ str_locate <- function(string, pattern) {
#' @rdname str_locate
#' @export
str_locate_all <- function(string, pattern) {
check_lengths(string, pattern)
opts <- opts(pattern)

switch(type(pattern),
empty = stri_locate_all_boundaries(string, omit_no_match = TRUE, opts_brkiter = opts),
empty = ,
bound = stri_locate_all_boundaries(string, omit_no_match = TRUE, opts_brkiter = opts),
fixed = stri_locate_all_fixed(string, pattern, omit_no_match = TRUE, opts_fixed = opts),
regex = stri_locate_all_regex(string, pattern, omit_no_match = TRUE, opts_regex = opts),
Expand Down
2 changes: 2 additions & 0 deletions R/match.r
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ str_match <- function(string, pattern) {
stop("Can only match regular expressions", call. = FALSE)
}

check_lengths(string, pattern)
stri_match_first_regex(string,
pattern,
opts_regex = opts(pattern)
Expand All @@ -51,6 +52,7 @@ str_match_all <- function(string, pattern) {
stop("Can only match regular expressions", call. = FALSE)
}

check_lengths(string, pattern)
stri_match_all_regex(string,
pattern,
omit_no_match = TRUE,
Expand Down
2 changes: 1 addition & 1 deletion R/modifiers.r
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ regex <- function(pattern, ignore_case = FALSE, multiline = FALSE,
#' @rdname modifiers
boundary <- function(type = c("character", "line_break", "sentence", "word"),
skip_word_none = NA, ...) {
type <- match.arg(type)
type <- arg_match(type)

if (identical(skip_word_none, NA)) {
skip_word_none <- type == "word"
Expand Down
3 changes: 2 additions & 1 deletion R/pad.r
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
#' # Longer strings are returned unchanged
#' str_pad("hadley", 3)
str_pad <- function(string, width, side = c("left", "right", "both"), pad = " ", use_length = FALSE) {
side <- match.arg(side)
vctrs::vec_size_common(string = string, width = width, pad = pad)
side <- arg_match(side)

switch(side,
left = stri_pad_left(string, width, pad = pad, use_length = use_length),
Expand Down
4 changes: 4 additions & 0 deletions R/replace.r
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ str_replace <- function(string, pattern, replacement) {
return(str_transform(string, pattern, replacement))
}

check_lengths(string, pattern, replacement)

switch(type(pattern),
empty = stop("Empty `pattern` not supported", call. = FALSE),
bound = stop("Boundary `pattern` not supported", call. = FALSE),
Expand Down Expand Up @@ -98,6 +100,8 @@ str_replace_all <- function(string, pattern, replacement) {
vec <- TRUE
}

check_lengths(string, pattern, replacement)

switch(type(pattern),
empty = stop("Empty `pattern`` not supported", call. = FALSE),
bound = stop("Boundary `pattern` not supported", call. = FALSE),
Expand Down
1 change: 1 addition & 0 deletions R/split.r
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#' str_split_n(fruits, " and ", 3)
str_split <- function(string, pattern, n = Inf, simplify = FALSE) {
if (identical(n, Inf)) n <- -1L
check_lengths(string, pattern)

switch(type(pattern),
empty = stri_split_boundaries(string, n = n, simplify = simplify, opts_brkiter = opts(pattern)),
Expand Down
1 change: 1 addition & 0 deletions R/stringr-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"_PACKAGE"

## usethis namespace: start
#' @import stringi
#' @import rlang
#' @importFrom glue glue
#' @importFrom lifecycle deprecated
Expand Down
4 changes: 4 additions & 0 deletions R/sub.r
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@
#' str_sub(x4, 1, 2, omit_na = TRUE) <- NA
#' x1; x2; x3; x4
str_sub <- function(string, start = 1L, end = -1L) {
vctrs::vec_size_common(string = string, start = start, end = end)

if (is.matrix(start)) {
stri_sub(string, from = start)
} else {
Expand All @@ -73,6 +75,8 @@ str_sub <- function(string, start = 1L, end = -1L) {
#' @export
#' @rdname str_sub
"str_sub<-" <- function(string, start = 1L, end = -1L, omit_na = FALSE, value) {
vctrs::vec_size_common(string = string, start = start, end = end)

if (is.matrix(start)) {
stri_sub(string, from = start, omit_na = omit_na) <- value
} else {
Expand Down
2 changes: 2 additions & 0 deletions R/subset.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
#' str_subset(c("a", NA, "b"), ".")
#' str_which(c("a", NA, "b"), ".")
str_subset <- function(string, pattern, negate = FALSE) {
check_lengths(string, pattern)

switch(type(pattern),
empty = ,
bound = string[str_detect(string, pattern) & !negate],
Expand Down
2 changes: 1 addition & 1 deletion R/trim.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#' str_squish(" String with trailing, middle, and leading white space\t")
#' str_squish("\n\nString with excess, trailing and leading white space\n\n")
str_trim <- function(string, side = c("both", "left", "right")) {
side <- match.arg(side)
side <- arg_match(side)

switch(side,
left = stri_trim_left(string),
Expand Down
5 changes: 4 additions & 1 deletion R/trunc.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@
#'
str_trunc <- function(string, width, side = c("right", "left", "center"),
ellipsis = "...") {
side <- match.arg(side)
side <- arg_match(side)
if (!is.numeric(width) || length(width) != 1) {
abort("`width` must be a single number")
}

too_long <- !is.na(string) & str_length(string) > width
width... <- width - str_length(ellipsis)
Expand Down
Loading