From e2e47d6eb5425cd860137d632c1b8688e5e74241 Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Fri, 24 Oct 2014 14:28:10 +0100 Subject: [PATCH 1/3] regex: Fix control flow in the parser --- src/libregex/parse.rs | 32 ++++++++++++++------------------ src/libregex/test/tests.rs | 10 ++++++++++ 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/src/libregex/parse.rs b/src/libregex/parse.rs index 1d1d1a0e9c5ca..2b6aa669f3c21 100644 --- a/src/libregex/parse.rs +++ b/src/libregex/parse.rs @@ -411,9 +411,6 @@ impl<'a> Parser<'a> { ast => fail!("Unexpected AST item '{}'", ast), } } - _ => {}, - } - match c { ']' => { if ranges.len() > 0 { let flags = negated | (self.flags & FLAG_NOCASE); @@ -431,22 +428,21 @@ impl<'a> Parser<'a> { } return Ok(()) } - c => { - if self.peek_is(1, '-') && !self.peek_is(2, ']') { - try!(self.expect('-')) - try!(self.noteof("not a ']'")) - let c2 = self.cur(); - if c2 < c { - return self.err(format!("Invalid character class \ - range '{}-{}'", - c, - c2).as_slice()) - } - ranges.push((c, self.cur())) - } else { - ranges.push((c, c)) - } + } + + if self.peek_is(1, '-') && !self.peek_is(2, ']') { + try!(self.expect('-')) + try!(self.noteof("not a ']'")) + let c2 = self.cur(); + if c2 < c { + return self.err(format!("Invalid character class \ + range '{}-{}'", + c, + c2).as_slice()) } + ranges.push((c, self.cur())) + } else { + ranges.push((c, c)) } } } diff --git a/src/libregex/test/tests.rs b/src/libregex/test/tests.rs index 088425c088855..fa645c84dd86d 100644 --- a/src/libregex/test/tests.rs +++ b/src/libregex/test/tests.rs @@ -43,6 +43,16 @@ fn empty_regex_nonempty_match() { assert_eq!(ms, vec![(0, 0), (1, 1), (2, 2), (3, 3)]); } +#[test] +fn quoted_bracket_set() { + let re = regex!(r"([\x{5b}\x{5d}])"); + let ms = re.find_iter("[]").collect::>(); + assert_eq!(ms, vec![(0, 1), (1, 2)]); + let re = regex!(r"([\[\]])"); + let ms = re.find_iter("[]").collect::>(); + assert_eq!(ms, vec![(0, 1), (1, 2)]); +} + macro_rules! replace( ($name:ident, $which:ident, $re:expr, $search:expr, $replace:expr, $result:expr) => ( From 48daba088b0f353cc4fdaba144da7a2c1b1de87f Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Fri, 24 Oct 2014 16:24:29 +0100 Subject: [PATCH 2/3] regex: Escaped literals can end ranges --- src/libregex/parse.rs | 23 +++++++++++++++++------ src/libregex/test/tests.rs | 11 +++++++++++ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/libregex/parse.rs b/src/libregex/parse.rs index 2b6aa669f3c21..b7313ff6c1a1f 100644 --- a/src/libregex/parse.rs +++ b/src/libregex/parse.rs @@ -375,15 +375,15 @@ impl<'a> Parser<'a> { let mut alts: Vec = vec!(); if self.peek_is(1, ']') { - try!(self.expect(']')) + try!(self.expect(']')); ranges.push((']', ']')) } while self.peek_is(1, '-') { - try!(self.expect('-')) + try!(self.expect('-')); ranges.push(('-', '-')) } loop { - try!(self.noteof("a closing ']' or a non-empty character class)")) + try!(self.noteof("a closing ']' or a non-empty character class)")); let mut c = self.cur(); match c { '[' => @@ -428,12 +428,23 @@ impl<'a> Parser<'a> { } return Ok(()) } + _ => {} } if self.peek_is(1, '-') && !self.peek_is(2, ']') { - try!(self.expect('-')) - try!(self.noteof("not a ']'")) - let c2 = self.cur(); + try!(self.expect('-')); + // The regex can't end here. + try!(self.noteof("not a ']'")); + // End the range with a single character or character escape. + let mut c2 = self.cur(); + if c2 == '\\' { + match try!(self.parse_escape()) { + Literal(c3, _) => c2 = c3, // allow literal escapes below + ast => + return self.err(format!("Expected a literal, but got {}.", + ast).as_slice()), + } + } if c2 < c { return self.err(format!("Invalid character class \ range '{}-{}'", diff --git a/src/libregex/test/tests.rs b/src/libregex/test/tests.rs index fa645c84dd86d..4f4137265c031 100644 --- a/src/libregex/test/tests.rs +++ b/src/libregex/test/tests.rs @@ -53,6 +53,13 @@ fn quoted_bracket_set() { assert_eq!(ms, vec![(0, 1), (1, 2)]); } +#[test] +fn range_ends_with_escape() { + let re = regex!(r"([\[-\x{5d}])"); + let ms = re.find_iter("[]").collect::>(); + assert_eq!(ms, vec![(0, 1), (1, 2)]); +} + macro_rules! replace( ($name:ident, $which:ident, $re:expr, $search:expr, $replace:expr, $result:expr) => ( @@ -124,6 +131,10 @@ noparse!(fail_double_neg, "(?-i-i)") noparse!(fail_neg_empty, "(?i-)") noparse!(fail_empty_group, "()") noparse!(fail_dupe_named, "(?P.)(?P.)") +noparse!(fail_range_end_no_class, "[a-[:lower:]]") +noparse!(fail_range_end_no_begin, r"[a-\A]") +noparse!(fail_range_end_no_end, r"[a-\z]") +noparse!(fail_range_end_no_boundary, r"[a-\b]") macro_rules! mat( ($name:ident, $re:expr, $text:expr, $($loc:tt)+) => ( From f21977318112a1b7491bbf462dfefbd8ec14743e Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Fri, 24 Oct 2014 16:38:23 +0100 Subject: [PATCH 3/3] regex: The first range in a character class can start with a left bracket --- src/libregex/parse.rs | 6 +----- src/libregex/test/tests.rs | 7 +++++++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/libregex/parse.rs b/src/libregex/parse.rs index b7313ff6c1a1f..35583be372cb3 100644 --- a/src/libregex/parse.rs +++ b/src/libregex/parse.rs @@ -374,10 +374,6 @@ impl<'a> Parser<'a> { let mut ranges: Vec<(char, char)> = vec!(); let mut alts: Vec = vec!(); - if self.peek_is(1, ']') { - try!(self.expect(']')); - ranges.push((']', ']')) - } while self.peek_is(1, '-') { try!(self.expect('-')); ranges.push(('-', '-')) @@ -411,7 +407,7 @@ impl<'a> Parser<'a> { ast => fail!("Unexpected AST item '{}'", ast), } } - ']' => { + ']' if ranges.len() > 0 || alts.len() > 0 => { if ranges.len() > 0 { let flags = negated | (self.flags & FLAG_NOCASE); let mut ast = AstClass(combine_ranges(ranges), flags); diff --git a/src/libregex/test/tests.rs b/src/libregex/test/tests.rs index 4f4137265c031..06f7db274189d 100644 --- a/src/libregex/test/tests.rs +++ b/src/libregex/test/tests.rs @@ -53,6 +53,13 @@ fn quoted_bracket_set() { assert_eq!(ms, vec![(0, 1), (1, 2)]); } +#[test] +fn first_range_starts_with_left_bracket() { + let re = regex!(r"([[-z])"); + let ms = re.find_iter("[]").collect::>(); + assert_eq!(ms, vec![(0, 1), (1, 2)]); +} + #[test] fn range_ends_with_escape() { let re = regex!(r"([\[-\x{5d}])");