Skip to content

Commit d6b3beb

Browse files
author
bors-servo
authored
Auto merge of #360 - Ygg01:master, r=jdm
Add criterion.rs to html5ever/xml5ever Fixes #45 and is a stepping stone for #286. Why is this important for #286? ----- It allows project to track it's performance, since [criterion.rs](https://github.com/bheisler/criterion.rs) generates benchmark data and tracks changes to performance. Possible enhancements ----- This is first pass at criterion. It's possible the performance could be better tracked, with more generic benchmarks. Possibly publishing this data, to see how it behaves overall. More tests for xml5ever, possibly tests that stress namespace rules and parts where xml is weak.
2 parents 95c374e + b8e9bed commit d6b3beb

File tree

13 files changed

+353
-308
lines changed

13 files changed

+353
-308
lines changed

html5ever/Cargo.toml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22

33
name = "html5ever"
4-
version = "0.22.5"
4+
version = "0.22.6"
55
authors = [ "The html5ever Project Developers" ]
66
license = "MIT / Apache-2.0"
77
repository = "https://github.com/servo/html5ever"
@@ -26,10 +26,6 @@ harness = false
2626
[[test]]
2727
name = "serializer"
2828

29-
[[bench]]
30-
name = "tokenizer"
31-
harness = false
32-
3329
[dependencies]
3430
log = "0.4"
3531
mac = "0.1"
@@ -39,8 +35,13 @@ markup5ever = { version = "0.7", path = "../markup5ever" }
3935
rustc-serialize = "0.3.15"
4036
rustc-test = "0.3"
4137
typed-arena = "1.3.0"
38+
criterion = "0.2"
4239

4340
[build-dependencies]
4441
quote = "0.6"
4542
syn = { version = "0.15", features = ["extra-traits", "full", "fold"] }
4643
proc-macro2 = "0.4"
44+
45+
[[bench]]
46+
name = "html5ever"
47+
harness = false

html5ever/benches/html5ever.rs

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#[macro_use]
2+
extern crate criterion;
3+
extern crate html5ever;
4+
5+
use std::fs;
6+
use std::path::PathBuf;
7+
8+
use criterion::{Criterion, black_box};
9+
10+
use html5ever::tokenizer::{BufferQueue, TokenSink, Token, Tokenizer, TokenizerOpts, TokenSinkResult};
11+
use html5ever::tendril::*;
12+
13+
struct Sink;
14+
15+
impl TokenSink for Sink {
16+
type Handle = ();
17+
18+
fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
19+
// Don't use the token, but make sure we don't get
20+
// optimized out entirely.
21+
black_box(token);
22+
TokenSinkResult::Continue
23+
}
24+
}
25+
26+
27+
fn run_bench(c: &mut Criterion, name: &str) {
28+
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
29+
path.push("data/bench/");
30+
path.push(name);
31+
let mut file = fs::File::open(&path).ok().expect("can't open file");
32+
33+
// Read the file and treat it as an infinitely repeating sequence of characters.
34+
let mut file_input = ByteTendril::new();
35+
file.read_to_tendril(&mut file_input).ok().expect("can't read file");
36+
let file_input: StrTendril = file_input.try_reinterpret().unwrap();
37+
let size = file_input.len();
38+
let mut stream = file_input.chars().cycle();
39+
40+
// Break the input into chunks of 1024 chars (= a few kB).
41+
// This simulates reading from the network.
42+
let mut input = vec![];
43+
let mut total = 0usize;
44+
while total < size {
45+
// The by_ref() call is important, otherwise we get wrong results!
46+
// See rust-lang/rust#18045.
47+
let sz = std::cmp::min(1024, size - total);
48+
input.push(stream.by_ref().take(sz).collect::<String>().to_tendril());
49+
total += sz;
50+
}
51+
52+
let test_name = format!("html tokenizing {}", name);
53+
54+
c.bench_function(&test_name, move |b| b.iter(|| {
55+
let mut tok = Tokenizer::new(Sink, Default::default());
56+
let mut buffer = BufferQueue::new();
57+
// We are doing clone inside the bench function, this is not ideal, but possibly
58+
// necessary since our iterator consumes the underlying buffer.
59+
for buf in input.clone().into_iter() {
60+
buffer.push_back(buf);
61+
let _ = tok.feed(&mut buffer);
62+
}
63+
let _ = tok.feed(&mut buffer);
64+
tok.end();
65+
}));
66+
}
67+
68+
69+
70+
fn html5ever_benchmark(c: &mut Criterion) {
71+
run_bench(c, "lipsum.html");
72+
run_bench(c, "lipsum-zh.html");
73+
run_bench(c, "medium-fragment.html");
74+
run_bench(c, "small-fragment.html");
75+
run_bench(c, "tiny-fragment.html");
76+
run_bench(c, "strong.html");
77+
}
78+
79+
criterion_group!(benches, html5ever_benchmark);
80+
criterion_main!(benches);

html5ever/benches/tokenizer.rs

Lines changed: 0 additions & 157 deletions
This file was deleted.

scripts/travis-build.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ set -ex
1212

1313
if [ $TRAVIS_RUST_VERSION = nightly ]
1414
then
15-
cargo test -p html5ever --features "rustc-test/capture" --bench tokenizer
15+
cargo bench --all
1616
cargo test -p html5ever --features "rustc-test/capture"
1717
cargo test -p xml5ever --features "rustc-test/capture"
1818
else
19-
cargo test -p html5ever --bench tokenizer
19+
cargo bench --all
2020
cargo test --all
2121
fi
2222

xml5ever/Cargo.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22

33
name = "xml5ever"
4-
version = "0.12.1"
4+
version = "0.13.0"
55
authors = ["The xml5ever project developers"]
66
license = "MIT / Apache-2.0"
77
repository = "https://github.com/servo/html5ever"
@@ -28,3 +28,8 @@ markup5ever = {version = "0.7", path = "../markup5ever" }
2828
[dev-dependencies]
2929
rustc-serialize = "0.3.15"
3030
rustc-test = "0.3"
31+
criterion = "0.2"
32+
33+
[[bench]]
34+
name = "xml5ever"
35+
harness = false

xml5ever/benches/xml5ever.rs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#[macro_use]
2+
extern crate criterion;
3+
extern crate xml5ever;
4+
extern crate markup5ever;
5+
6+
use std::fs;
7+
use std::path::PathBuf;
8+
9+
use criterion::{Criterion, black_box};
10+
11+
use markup5ever::buffer_queue::BufferQueue;
12+
use xml5ever::tokenizer::{TokenSink, Token, XmlTokenizer, XmlTokenizerOpts};
13+
use xml5ever::tendril::*;
14+
15+
struct Sink;
16+
17+
impl TokenSink for Sink {
18+
fn process_token(&mut self, token: Token) {
19+
// Don't use the token, but make sure we don't get
20+
// optimized out entirely.
21+
black_box(token);
22+
}
23+
}
24+
25+
26+
fn run_bench(c: &mut Criterion, name: &str) {
27+
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
28+
path.push("data/bench/");
29+
path.push(name);
30+
let mut file = fs::File::open(&path).ok().expect("can't open file");
31+
32+
// Read the file and treat it as an infinitely repeating sequence of characters.
33+
let mut file_input = ByteTendril::new();
34+
file.read_to_tendril(&mut file_input).ok().expect("can't read file");
35+
let file_input: StrTendril = file_input.try_reinterpret().unwrap();
36+
let size = file_input.len();
37+
let mut stream = file_input.chars().cycle();
38+
39+
// Break the input into chunks of 1024 chars (= a few kB).
40+
// This simulates reading from the network.
41+
let mut input = vec![];
42+
let mut total = 0usize;
43+
while total < size {
44+
// The by_ref() call is important, otherwise we get wrong results!
45+
// See rust-lang/rust#18045.
46+
let sz = std::cmp::min(1024, size - total);
47+
input.push(stream.by_ref().take(sz).collect::<String>().to_tendril());
48+
total += sz;
49+
}
50+
51+
let test_name = format!("xml tokenizing {}", name);
52+
53+
c.bench_function(&test_name, move |b| b.iter(|| {
54+
let mut tok = XmlTokenizer::new(Sink, Default::default());
55+
let mut buffer = BufferQueue::new();
56+
// We are doing clone inside the bench function, this is not ideal, but possibly
57+
// necessary since our iterator consumes the underlying buffer.
58+
for buf in input.clone().into_iter() {
59+
buffer.push_back(buf);
60+
let _ = tok.feed(&mut buffer);
61+
}
62+
let _ = tok.feed(&mut buffer);
63+
tok.end();
64+
}));
65+
}
66+
67+
68+
69+
fn xml5ever_benchmarks(c: &mut Criterion) {
70+
run_bench(c, "strong.xml");
71+
}
72+
73+
criterion_group!(benches, xml5ever_benchmarks);
74+
criterion_main!(benches);

xml5ever/data/bench/strong.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong>

0 commit comments

Comments
 (0)