From 3a06b1e8ba5e90dfa54ad75ce92873ca142e7027 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Wed, 1 Apr 2020 16:04:14 -0400 Subject: [PATCH 1/3] Add tests for HTML parsing --- src/utils/html.rs | 35 ++++++++++++++++++ tests/regex/body.html | 86 +++++++++++++++++++++++++++++++++++++++++++ tests/regex/head.html | 1 + 3 files changed, 122 insertions(+) create mode 100644 tests/regex/body.html create mode 100644 tests/regex/head.html diff --git a/src/utils/html.rs b/src/utils/html.rs index a7fe2b41a..932dbe494 100644 --- a/src/utils/html.rs +++ b/src/utils/html.rs @@ -72,3 +72,38 @@ fn extract_class(node: &Handle) -> String { _ => String::new() } } + +#[cfg(test)] +mod test { + use super::*; + + fn round_trip(a: &str) -> String { + let parser = parse_document(RcDom::default(), ParseOpts::default()); + stringify(parser.one(a).document) + } + fn mostly_equal(a: &str, b: &str) -> bool { + round_trip(a.trim()).trim() == round_trip(b.trim()).trim() + } + + #[test] + fn small_html() { + let (head, body, class) = super::extract_head_and_body( + r#"

hello

"# + ).unwrap(); + assert_eq!(head, r#""#); + assert_eq!(body, "

hello

"); + assert_eq!(class, "rustdoc struct"); + } + + // more of an integration test + #[test] + fn parse_regex_html() { + let original = std::fs::read_to_string("benches/struct.CaptureMatches.html").unwrap(); + let expected_head = std::fs::read_to_string("tests/regex/head.html").unwrap(); + let expected_body = std::fs::read_to_string("tests/regex/body.html").unwrap(); + let (head, body, class) = super::extract_head_and_body(&original).unwrap(); + assert!(mostly_equal(&head, &expected_head)); + assert!(mostly_equal(&body, &expected_body)); + assert_eq!(class, "rustdoc struct"); + } +} diff --git a/tests/regex/body.html b/tests/regex/body.html new file mode 100644 index 000000000..fdf8dca28 --- /dev/null +++ b/tests/regex/body.html @@ -0,0 +1,86 @@ +

[][src]Struct regex::CaptureMatches

pub struct CaptureMatches<'r, 't>(_);

An iterator that yields all non-overlapping capture groups matching a +particular regular expression.

+

The iterator stops when no more matches can be found.

+

'r is the lifetime of the compiled regular expression and 't is the +lifetime of the matched string.

+

Trait Implementations

impl<'r, 't> Iterator for CaptureMatches<'r, 't>[src]

type Item = Captures<'t>

The type of the elements being iterated over.

+

Auto Trait Implementations

impl<'r, 't> !Sync for CaptureMatches<'r, 't>

impl<'r, 't> !Send for CaptureMatches<'r, 't>

impl<'r, 't> Unpin for CaptureMatches<'r, 't>

impl<'r, 't> !UnwindSafe for CaptureMatches<'r, 't>

impl<'r, 't> !RefUnwindSafe for CaptureMatches<'r, 't>

Blanket Implementations

impl<T, U> Into<U> for T where
    U: From<T>, 
[src]

impl<I> IntoIterator for I where
    I: Iterator
[src]

type Item = <I as Iterator>::Item

The type of the elements being iterated over.

+

type IntoIter = I

Which kind of iterator are we turning this into?

+

impl<T> From<T> for T[src]

impl<T, U> TryFrom<U> for T where
    U: Into<T>, 
[src]

type Error = Infallible

The type returned in the event of a conversion error.

+

impl<T, U> TryInto<U> for T where
    U: TryFrom<T>, 
[src]

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.

+

impl<T> BorrowMut<T> for T where
    T: ?Sized
[src]

impl<T> Borrow<T> for T where
    T: ?Sized
[src]

impl<T> Any for T where
    T: 'static + ?Sized
[src]

diff --git a/tests/regex/head.html b/tests/regex/head.html new file mode 100644 index 000000000..2a7cea013 --- /dev/null +++ b/tests/regex/head.html @@ -0,0 +1 @@ +regex::CaptureMatches - Rust From d193e10f650583dfd83ef054c099615d9bd10310 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Wed, 1 Apr 2020 16:36:16 -0400 Subject: [PATCH 2/3] Fix failing test --- src/utils/html.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils/html.rs b/src/utils/html.rs index 932dbe494..d439390d2 100644 --- a/src/utils/html.rs +++ b/src/utils/html.rs @@ -90,8 +90,8 @@ mod test { let (head, body, class) = super::extract_head_and_body( r#"

hello

"# ).unwrap(); - assert_eq!(head, r#""#); - assert_eq!(body, "

hello

"); + assert_eq!(head, r#""#); + assert_eq!(body, "

hello

"); assert_eq!(class, "rustdoc struct"); } From b3b90f156f125d0a3798227ca81c8b81256361ca Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Wed, 1 Apr 2020 17:13:52 -0400 Subject: [PATCH 3/3] Use exact comparisons instead of round-tripping through the parser This makes the tests independent of the parser we use, although we may need to update our known-good tests whenever we switch parsers. --- src/utils/html.rs | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/utils/html.rs b/src/utils/html.rs index d439390d2..b145a356d 100644 --- a/src/utils/html.rs +++ b/src/utils/html.rs @@ -75,16 +75,6 @@ fn extract_class(node: &Handle) -> String { #[cfg(test)] mod test { - use super::*; - - fn round_trip(a: &str) -> String { - let parser = parse_document(RcDom::default(), ParseOpts::default()); - stringify(parser.one(a).document) - } - fn mostly_equal(a: &str, b: &str) -> bool { - round_trip(a.trim()).trim() == round_trip(b.trim()).trim() - } - #[test] fn small_html() { let (head, body, class) = super::extract_head_and_body( @@ -102,8 +92,8 @@ mod test { let expected_head = std::fs::read_to_string("tests/regex/head.html").unwrap(); let expected_body = std::fs::read_to_string("tests/regex/body.html").unwrap(); let (head, body, class) = super::extract_head_and_body(&original).unwrap(); - assert!(mostly_equal(&head, &expected_head)); - assert!(mostly_equal(&body, &expected_body)); + assert_eq!(head, expected_head.trim()); + assert_eq!(&body, &expected_body.trim()); assert_eq!(class, "rustdoc struct"); } }