Skip to content

Add criterion.rs to html5ever/xml5ever #360

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Dec 14, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions html5ever/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

name = "html5ever"
version = "0.22.5"
version = "0.22.6"
authors = [ "The html5ever Project Developers" ]
license = "MIT / Apache-2.0"
repository = "https://github.com/servo/html5ever"
Expand All @@ -26,10 +26,6 @@ harness = false
[[test]]
name = "serializer"

[[bench]]
name = "tokenizer"
harness = false

[dependencies]
log = "0.4"
mac = "0.1"
Expand All @@ -39,8 +35,13 @@ markup5ever = { version = "0.7", path = "../markup5ever" }
rustc-serialize = "0.3.15"
rustc-test = "0.3"
typed-arena = "1.3.0"
criterion = "0.2"

[build-dependencies]
quote = "0.6"
syn = { version = "0.15", features = ["extra-traits", "full", "fold"] }
proc-macro2 = "0.4"

[[bench]]
name = "html5ever"
harness = false
80 changes: 80 additions & 0 deletions html5ever/benches/html5ever.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#[macro_use]
extern crate criterion;
extern crate html5ever;

use std::fs;
use std::path::PathBuf;

use criterion::{Criterion, black_box};

use html5ever::tokenizer::{BufferQueue, TokenSink, Token, Tokenizer, TokenizerOpts, TokenSinkResult};
use html5ever::tendril::*;

struct Sink;

impl TokenSink for Sink {
type Handle = ();

fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
// Don't use the token, but make sure we don't get
// optimized out entirely.
black_box(token);
TokenSinkResult::Continue
}
}


fn run_bench(c: &mut Criterion, name: &str) {
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("data/bench/");
path.push(name);
let mut file = fs::File::open(&path).ok().expect("can't open file");

// Read the file and treat it as an infinitely repeating sequence of characters.
let mut file_input = ByteTendril::new();
file.read_to_tendril(&mut file_input).ok().expect("can't read file");
let file_input: StrTendril = file_input.try_reinterpret().unwrap();
let size = file_input.len();
let mut stream = file_input.chars().cycle();

// Break the input into chunks of 1024 chars (= a few kB).
// This simulates reading from the network.
let mut input = vec![];
let mut total = 0usize;
while total < size {
// The by_ref() call is important, otherwise we get wrong results!
// See rust-lang/rust#18045.
let sz = std::cmp::min(1024, size - total);
input.push(stream.by_ref().take(sz).collect::<String>().to_tendril());
total += sz;
}

let test_name = format!("html tokenizing {}", name);

c.bench_function(&test_name, move |b| b.iter(|| {
let mut tok = Tokenizer::new(Sink, Default::default());
let mut buffer = BufferQueue::new();
// We are doing clone inside the bench function, this is not ideal, but possibly
// necessary since our iterator consumes the underlying buffer.
for buf in input.clone().into_iter() {
buffer.push_back(buf);
let _ = tok.feed(&mut buffer);
}
let _ = tok.feed(&mut buffer);
tok.end();
}));
}



fn html5ever_benchmark(c: &mut Criterion) {
run_bench(c, "lipsum.html");
run_bench(c, "lipsum-zh.html");
run_bench(c, "medium-fragment.html");
run_bench(c, "small-fragment.html");
run_bench(c, "tiny-fragment.html");
run_bench(c, "strong.html");
}

criterion_group!(benches, html5ever_benchmark);
criterion_main!(benches);
157 changes: 0 additions & 157 deletions html5ever/benches/tokenizer.rs

This file was deleted.

4 changes: 2 additions & 2 deletions scripts/travis-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ set -ex

if [ $TRAVIS_RUST_VERSION = nightly ]
then
cargo test -p html5ever --features "rustc-test/capture" --bench tokenizer
cargo bench --all
cargo test -p html5ever --features "rustc-test/capture"
cargo test -p xml5ever --features "rustc-test/capture"
else
cargo test -p html5ever --bench tokenizer
cargo bench --all
cargo test --all
fi

Expand Down
7 changes: 6 additions & 1 deletion xml5ever/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

name = "xml5ever"
version = "0.12.1"
version = "0.13.0"
authors = ["The xml5ever project developers"]
license = "MIT / Apache-2.0"
repository = "https://github.com/servo/html5ever"
Expand All @@ -28,3 +28,8 @@ markup5ever = {version = "0.7", path = "../markup5ever" }
[dev-dependencies]
rustc-serialize = "0.3.15"
rustc-test = "0.3"
criterion = "0.2"

[[bench]]
name = "xml5ever"
harness = false
74 changes: 74 additions & 0 deletions xml5ever/benches/xml5ever.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#[macro_use]
extern crate criterion;
extern crate xml5ever;
extern crate markup5ever;

use std::fs;
use std::path::PathBuf;

use criterion::{Criterion, black_box};

use markup5ever::buffer_queue::BufferQueue;
use xml5ever::tokenizer::{TokenSink, Token, XmlTokenizer, XmlTokenizerOpts};
use xml5ever::tendril::*;

struct Sink;

impl TokenSink for Sink {
fn process_token(&mut self, token: Token) {
// Don't use the token, but make sure we don't get
// optimized out entirely.
black_box(token);
}
}


fn run_bench(c: &mut Criterion, name: &str) {
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("data/bench/");
path.push(name);
let mut file = fs::File::open(&path).ok().expect("can't open file");

// Read the file and treat it as an infinitely repeating sequence of characters.
let mut file_input = ByteTendril::new();
file.read_to_tendril(&mut file_input).ok().expect("can't read file");
let file_input: StrTendril = file_input.try_reinterpret().unwrap();
let size = file_input.len();
let mut stream = file_input.chars().cycle();

// Break the input into chunks of 1024 chars (= a few kB).
// This simulates reading from the network.
let mut input = vec![];
let mut total = 0usize;
while total < size {
// The by_ref() call is important, otherwise we get wrong results!
// See rust-lang/rust#18045.
let sz = std::cmp::min(1024, size - total);
input.push(stream.by_ref().take(sz).collect::<String>().to_tendril());
total += sz;
}

let test_name = format!("xml tokenizing {}", name);

c.bench_function(&test_name, move |b| b.iter(|| {
let mut tok = XmlTokenizer::new(Sink, Default::default());
let mut buffer = BufferQueue::new();
// We are doing clone inside the bench function, this is not ideal, but possibly
// necessary since our iterator consumes the underlying buffer.
for buf in input.clone().into_iter() {
buffer.push_back(buf);
let _ = tok.feed(&mut buffer);
}
let _ = tok.feed(&mut buffer);
tok.end();
}));
}



fn xml5ever_benchmarks(c: &mut Criterion) {
run_bench(c, "strong.xml");
}

criterion_group!(benches, xml5ever_benchmarks);
criterion_main!(benches);
1 change: 1 addition & 0 deletions xml5ever/data/bench/strong.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong><strong>
Loading