From face911aef0d6399a20c6adf3fa92a085e20abe5 Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Mon, 1 Oct 2018 22:17:31 +0900 Subject: [PATCH 1/3] Add failing tests --- tests/testthat/test-detect.r | 3 +++ tests/testthat/test-subset.r | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/tests/testthat/test-detect.r b/tests/testthat/test-detect.r index 8a5e53d1..589dd83e 100644 --- a/tests/testthat/test-detect.r +++ b/tests/testthat/test-detect.r @@ -8,6 +8,9 @@ test_that("special cases are correct", { test_that("vectorised patterns work", { expect_equal(str_detect("ab", c("a", "b", "c")), c(T, T, F)) expect_equal(str_detect(c("ca", "ab"), c("a", "c")), c(T, F)) + + # negation works + expect_equal(str_detect("ab", c("a", "b", "c"), negate = TRUE), c(F, F, T)) }) test_that("modifiers work", { diff --git a/tests/testthat/test-subset.r b/tests/testthat/test-subset.r index a3a569dc..187fda9e 100644 --- a/tests/testthat/test-subset.r +++ b/tests/testthat/test-subset.r @@ -6,6 +6,9 @@ test_that("basic subsetting for fixed patterns works", { str_subset(c("i", "I"), fixed("i", ignore_case = TRUE)), c("i", "I") ) + + # negation works + expect_equal(str_subset(c("i", "I"), fixed("i"), negate = TRUE), "I") }) test_that("str_which is equivalent to grep", { @@ -13,4 +16,10 @@ test_that("str_which is equivalent to grep", { str_which(head(letters), "[aeiou]"), grep("[aeiou]", head(letters)) ) + + # negation works + expect_equal( + str_which(head(letters), "[aeiou]", negate = TRUE), + grep("[aeiou]", head(letters), invert = TRUE) + ) }) From 78786729b2a7e8007feb1c117fec122cb4713891 Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Mon, 1 Oct 2018 22:34:59 +0900 Subject: [PATCH 2/3] Add negate argument to str_subset(), str_detect(), and str_which() --- R/detect.r | 15 ++++++++++----- R/subset.R | 17 ++++++++++------- man/str_detect.Rd | 7 ++++++- man/str_subset.Rd | 9 +++++++-- 4 files changed, 33 insertions(+), 15 deletions(-) diff --git a/R/detect.r b/R/detect.r index 56a570da..18400f09 100644 --- a/R/detect.r +++ b/R/detect.r @@ -20,6 +20,8 @@ #' Match character, word, line and sentence boundaries with #' [boundary()]. An empty pattern, "", is equivalent to #' `boundary("character")`. +#' +#' @param negate If `TRUE`, return non-matching elements. #' @return A logical vector. #' @seealso [stringi::stri_detect()] which this function wraps, #' [str_subset()] for a convenient wrapper around @@ -35,12 +37,15 @@ #' #' # Also vectorised over pattern #' str_detect("aecfg", letters) -str_detect <- function(string, pattern) { +#' +#' # Returns TRUE if the pattern do NOT match +#' str_detect(fruit, "^p", negate = TRUE) +str_detect <- function(string, pattern, negate = FALSE) { switch(type(pattern), empty = , - bound = str_count(string, pattern) > 0, - fixed = stri_detect_fixed(string, pattern, opts_fixed = opts(pattern)), - coll = stri_detect_coll(string, pattern, opts_collator = opts(pattern)), - regex = stri_detect_regex(string, pattern, opts_regex = opts(pattern)) + bound = str_count(string, pattern) > 0 & !negate, + fixed = stri_detect_fixed(string, pattern, negate = negate, opts_fixed = opts(pattern)), + coll = stri_detect_coll(string, pattern, negate = negate, opts_collator = opts(pattern)), + regex = stri_detect_regex(string, pattern, negate = negate, opts_regex = opts(pattern)) ) } diff --git a/R/subset.R b/R/subset.R index fc72432e..c1c9182a 100644 --- a/R/subset.R +++ b/R/subset.R @@ -23,21 +23,24 @@ #' str_subset(fruit, "b") #' str_subset(fruit, "[aeiou]") #' +#' # Returns elements that do NOT match +#' str_subset(fruit, "^p", negate = TRUE) +#' #' # Missings never match #' str_subset(c("a", NA, "b"), ".") #' str_which(c("a", NA, "b"), ".") -str_subset <- function(string, pattern) { +str_subset <- function(string, pattern, negate = FALSE) { switch(type(pattern), empty = , - bound = string[str_detect(string, pattern)], - fixed = stri_subset_fixed(string, pattern, omit_na = TRUE, opts_fixed = opts(pattern)), - coll = stri_subset_coll(string, pattern, omit_na = TRUE, opts_collator = opts(pattern)), - regex = stri_subset_regex(string, pattern, omit_na = TRUE, opts_regex = opts(pattern)) + bound = string[str_detect(string, pattern) & !negate], + fixed = stri_subset_fixed(string, pattern, omit_na = TRUE, negate = negate, opts_fixed = opts(pattern)), + coll = stri_subset_coll(string, pattern, omit_na = TRUE, negate = negate, opts_collator = opts(pattern)), + regex = stri_subset_regex(string, pattern, omit_na = TRUE, negate = negate, opts_regex = opts(pattern)) ) } #' @export #' @rdname str_subset -str_which <- function(string, pattern) { - which(str_detect(string, pattern)) +str_which <- function(string, pattern, negate = FALSE) { + which(str_detect(string, pattern, negate = negate)) } diff --git a/man/str_detect.Rd b/man/str_detect.Rd index 51563b01..bc0ec558 100644 --- a/man/str_detect.Rd +++ b/man/str_detect.Rd @@ -4,7 +4,7 @@ \alias{str_detect} \title{Detect the presence or absence of a pattern in a string.} \usage{ -str_detect(string, pattern) +str_detect(string, pattern, negate = FALSE) } \arguments{ \item{string}{Input vector. Either a character vector, or something @@ -24,6 +24,8 @@ respects character matching rules for the specified locale. Match character, word, line and sentence boundaries with \code{\link[=boundary]{boundary()}}. An empty pattern, "", is equivalent to \code{boundary("character")}.} + +\item{negate}{If \code{TRUE}, return non-matching elements.} } \value{ A logical vector. @@ -43,6 +45,9 @@ str_detect(fruit, "[aeiou]") # Also vectorised over pattern str_detect("aecfg", letters) + +# Returns TRUE if the pattern do NOT match +str_detect(fruit, "^p", negate = TRUE) } \seealso{ \code{\link[stringi:stri_detect]{stringi::stri_detect()}} which this function wraps, diff --git a/man/str_subset.Rd b/man/str_subset.Rd index 41cc7d0a..baab722d 100644 --- a/man/str_subset.Rd +++ b/man/str_subset.Rd @@ -5,9 +5,9 @@ \alias{str_which} \title{Keep strings matching a pattern, or find positions.} \usage{ -str_subset(string, pattern) +str_subset(string, pattern, negate = FALSE) -str_which(string, pattern) +str_which(string, pattern, negate = FALSE) } \arguments{ \item{string}{Input vector. Either a character vector, or something @@ -27,6 +27,8 @@ respects character matching rules for the specified locale. Match character, word, line and sentence boundaries with \code{\link[=boundary]{boundary()}}. An empty pattern, "", is equivalent to \code{boundary("character")}.} + +\item{negate}{If \code{TRUE}, return non-matching elements.} } \value{ A character vector. @@ -51,6 +53,9 @@ str_subset(fruit, "a$") str_subset(fruit, "b") str_subset(fruit, "[aeiou]") +# Returns elements that do NOT match +str_subset(fruit, "^p", negate = TRUE) + # Missings never match str_subset(c("a", NA, "b"), ".") str_which(c("a", NA, "b"), ".") From 73de3cb53ab7d42cc0f27035a706b5ea4a6383a2 Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Mon, 1 Oct 2018 22:47:01 +0900 Subject: [PATCH 3/3] Add a NEWS --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index 1cf6e029..c49c59b6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,10 @@ * `str_interp()` now renders lists consistently independent on the presence of additional placeholders (@amhrasmussen) +* `str_subset()`, `str_detect()`, and `str_which()` gets `negate` argument, + which is useful when you want the elements that do NOT match (#259, + @yutannihilation). + # stringr 1.3.1 * `str_replace_all()` with a named vector now respects modifier functions (#207)