Skip to content

Commit c7a94d0

Browse files
committed
Merge pull request #101 from lmullen/fix-sentence-splitting
Fix sentence splitting
2 parents 15a586a + cbb27e2 commit c7a94d0

File tree

4 files changed

+23
-4
lines changed

4 files changed

+23
-4
lines changed

NEWS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313

1414
* `str_replace()` and `str_replace_all()` now behave correctly when a
1515
replacement string contains `$`s, `\\\\1`, etc. (#83, @gagolews).
16+
17+
* `boundary()` has a different default argument which works for splitting on
18+
sentence boundaries (#58, @lmullen).
1619

1720
# stringr 1.0.0
1821

R/modifiers.r

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,12 +115,18 @@ regex <- function(pattern, ignore_case = FALSE, multiline = FALSE,
115115

116116
#' @param type Boundary type to detect.
117117
#' @param skip_word_none Ignore "words" that don't contain any characters
118-
#' or numbers - i.e. punctuation.
118+
#' or numbers - i.e. punctuation. Default \code{NA} will skip such "words"
119+
#' only when splitting on \code{word} boundaries.
119120
#' @export
120121
#' @rdname modifiers
121122
boundary <- function(type = c("character", "line_break", "sentence", "word"),
122-
skip_word_none = TRUE, ...) {
123+
skip_word_none = NA, ...) {
123124
type <- match.arg(type)
125+
126+
if (identical(skip_word_none, NA)) {
127+
skip_word_none <- type == "word"
128+
}
129+
124130
options <- stri_opts_brkiter(
125131
type = type,
126132
skip_word_none = skip_word_none,

man/modifiers.Rd

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-split.r

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,12 @@ test_that("n sets exact number of splits in str_split_fixed", {
6767
equals(c("Subject", "Roger: his drinking problems")))
6868

6969
})
70+
71+
test_that("str_split can split sentences correctly", {
72+
test <- "This is a sentence. Is this a sentence? Why, yes it is."
73+
expect_that(length(str_split(test, boundary("sentence"))[[1]]),
74+
equals(3))
75+
expect_that(str_split(test, boundary("sentence")),
76+
equals(list(c("This is a sentence. ", "Is this a sentence? ",
77+
"Why, yes it is."))))
78+
})

0 commit comments

Comments
 (0)