Skip to content

Commit f90d2d4

Browse files
committed
feat: performance testing harness and perf tests for scan file plan and execute
1 parent 79711ac commit f90d2d4

File tree

14 files changed

+669
-9
lines changed

14 files changed

+669
-9
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,10 @@ jobs:
110110
rust-version: ${{ env.rust_msrv }}
111111

112112
- name: Test
113-
run: cargo test --no-fail-fast --all-targets --all-features --workspace
113+
run: cargo test --no-fail-fast --lib --examples --tests --all-features --workspace
114114

115115
- name: Async-std Test
116-
run: cargo test --no-fail-fast --all-targets --no-default-features --features "async-std" --features "storage-all" --workspace
116+
run: cargo test --no-fail-fast --lib --examples --tests --no-default-features --features "async-std" --features "storage-all" --workspace
117117

118118
- name: Doc Test
119119
run: cargo test --no-fail-fast --doc --all-features --workspace

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ Cargo.lock
2121
.vscode
2222
**/.DS_Store
2323
dist/*
24+
2425
**/venv
2526
*.so
2627
*.pyc
28+
29+
crates/iceberg/testdata/performance/raw_data/*
30+
crates/iceberg/testdata/performance/warehouse

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ check-toml: install-taplo-cli
5050
check: check-fmt check-clippy cargo-sort check-toml cargo-machete
5151

5252
doc-test:
53-
cargo test --no-fail-fast --doc --all-features --workspace
53+
cargo test --no-fail-fast --doc --lib --examples '*' --tests '*' --workspace
5454

5555
unit-test: doc-test
5656
cargo test --no-fail-fast --lib --all-features --workspace

crates/iceberg/Cargo.toml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,20 @@ url = { workspace = true }
7979
uuid = { workspace = true }
8080

8181
[dev-dependencies]
82+
criterion = { version = "0.3", features = ["async_tokio", "async_futures"] }
8283
ctor = { workspace = true }
84+
futures-util = "0.3"
8385
iceberg-catalog-memory = { workspace = true }
86+
iceberg-catalog-rest = { path = "../catalog/rest" }
8487
iceberg_test_utils = { path = "../test_utils", features = ["tests"] }
8588
pretty_assertions = { workspace = true }
8689
tempfile = { workspace = true }
8790
tera = { workspace = true }
91+
92+
[[bench]]
93+
name = "table_scan_plan_files"
94+
harness = false
95+
96+
[[bench]]
97+
name = "table_scan_execute_query"
98+
harness = false
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use criterion::*;
19+
use iceberg::expr::Reference;
20+
use iceberg::spec::Datum;
21+
use tokio::runtime::Runtime;
22+
23+
mod utils;
24+
use utils::{create_file_plan, create_task_stream, exec_plan, setup};
25+
26+
pub fn bench_read_all_files_all_rows(c: &mut Criterion) {
27+
let runtime = Runtime::new().unwrap();
28+
let table = setup(&runtime);
29+
let scan = table.scan().build().unwrap();
30+
let tasks = create_file_plan(&runtime, scan);
31+
32+
c.bench_function("scan: read (all files, all rows)", |b| {
33+
b.to_async(&runtime).iter_batched(
34+
|| create_task_stream(tasks.clone()),
35+
|plan| exec_plan(table.clone(), plan),
36+
BatchSize::SmallInput,
37+
)
38+
});
39+
}
40+
41+
pub fn bench_read_all_files_some_rows(c: &mut Criterion) {
42+
let runtime = Runtime::new().unwrap();
43+
let table = setup(&runtime);
44+
let scan = table
45+
.scan()
46+
.with_filter(Reference::new("passenger_count").equal_to(Datum::double(1.0)))
47+
.build()
48+
.unwrap();
49+
let tasks = create_file_plan(&runtime, scan);
50+
51+
c.bench_function("scan: read (all files, some rows)", |b| {
52+
b.to_async(&runtime).iter_batched(
53+
|| create_task_stream(tasks.clone()),
54+
|plan| exec_plan(table.clone(), plan),
55+
BatchSize::SmallInput,
56+
)
57+
});
58+
}
59+
60+
pub fn bench_read_some_files_all_rows(c: &mut Criterion) {
61+
let runtime = Runtime::new().unwrap();
62+
let table = setup(&runtime);
63+
let scan = table
64+
.scan()
65+
.with_filter(
66+
Reference::new("tpep_pickup_datetime")
67+
.greater_than_or_equal_to(
68+
Datum::timestamptz_from_str("2024-02-01T00:00:00.000 UTC").unwrap(),
69+
)
70+
.and(Reference::new("tpep_pickup_datetime").less_than(
71+
Datum::timestamptz_from_str("2024-02-02T00:00:00.000 UTC").unwrap(),
72+
)),
73+
)
74+
.build()
75+
.unwrap();
76+
let tasks = create_file_plan(&runtime, scan);
77+
78+
c.bench_function("scan: read (some files, all rows)", |b| {
79+
b.to_async(&runtime).iter_batched(
80+
|| create_task_stream(tasks.clone()),
81+
|plan| exec_plan(table.clone(), plan),
82+
BatchSize::SmallInput,
83+
)
84+
});
85+
}
86+
87+
pub fn bench_read_some_files_some_rows(c: &mut Criterion) {
88+
let runtime = Runtime::new().unwrap();
89+
let table = setup(&runtime);
90+
let scan =
91+
table
92+
.scan()
93+
.with_filter(
94+
Reference::new("tpep_pickup_datetime")
95+
.greater_than_or_equal_to(
96+
Datum::timestamptz_from_str("2024-02-01T00:00:00.000 UTC").unwrap(),
97+
)
98+
.and(Reference::new("tpep_pickup_datetime").less_than(
99+
Datum::timestamptz_from_str("2024-02-02T00:00:00.000 UTC").unwrap(),
100+
))
101+
.and(Reference::new("passenger_count").equal_to(Datum::double(1.0))),
102+
)
103+
.build()
104+
.unwrap();
105+
let tasks = create_file_plan(&runtime, scan);
106+
107+
c.bench_function("scan: read (some files, some rows)", |b| {
108+
b.to_async(&runtime).iter_batched(
109+
|| create_task_stream(tasks.clone()),
110+
|plan| exec_plan(table.clone(), plan),
111+
BatchSize::SmallInput,
112+
)
113+
});
114+
}
115+
116+
criterion_group! {
117+
name = benches;
118+
config = Criterion::default().sample_size(10);
119+
targets = bench_read_some_files_some_rows, bench_read_some_files_all_rows, bench_read_all_files_some_rows, bench_read_all_files_all_rows
120+
}
121+
122+
criterion_main!(benches);
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use criterion::*;
19+
use futures_util::StreamExt;
20+
use iceberg::expr::Reference;
21+
use iceberg::spec::Datum;
22+
use iceberg::table::Table;
23+
use tokio::runtime::Runtime;
24+
mod utils;
25+
use utils::setup;
26+
27+
async fn all_files_all_rows(table: &Table) {
28+
let scan = table.scan().build().unwrap();
29+
let mut stream = scan.plan_files().await.unwrap();
30+
31+
while let Some(item) = stream.next().await {
32+
black_box(item.unwrap());
33+
}
34+
}
35+
36+
async fn one_file_all_rows(table: &Table) {
37+
let scan = table
38+
.scan()
39+
.with_filter(
40+
Reference::new("tpep_pickup_datetime")
41+
.greater_than_or_equal_to(
42+
Datum::timestamptz_from_str("2024-02-01T00:00:00.000 UTC").unwrap(),
43+
)
44+
.and(Reference::new("tpep_pickup_datetime").less_than(
45+
Datum::timestamptz_from_str("2024-02-02T00:00:00.000 UTC").unwrap(),
46+
)),
47+
)
48+
.build()
49+
.unwrap();
50+
let mut stream = scan.plan_files().await.unwrap();
51+
52+
while let Some(item) = stream.next().await {
53+
black_box(item.unwrap());
54+
}
55+
}
56+
57+
async fn all_files_some_rows(table: &Table) {
58+
let scan = table
59+
.scan()
60+
.with_filter(Reference::new("passenger_count").equal_to(Datum::double(1.0)))
61+
.build()
62+
.unwrap();
63+
let mut stream = scan.plan_files().await.unwrap();
64+
65+
while let Some(item) = stream.next().await {
66+
black_box(item.unwrap());
67+
}
68+
}
69+
70+
async fn one_file_some_rows(table: &Table) {
71+
let scan =
72+
table
73+
.scan()
74+
.with_filter(
75+
Reference::new("tpep_pickup_datetime")
76+
.greater_than_or_equal_to(
77+
Datum::timestamptz_from_str("2024-02-01T00:00:00.000 UTC").unwrap(),
78+
)
79+
.and(Reference::new("tpep_pickup_datetime").less_than(
80+
Datum::timestamptz_from_str("2024-02-02T00:00:00.000 UTC").unwrap(),
81+
))
82+
.and(Reference::new("passenger_count").equal_to(Datum::double(1.0))),
83+
)
84+
.build()
85+
.unwrap();
86+
let mut stream = scan.plan_files().await.unwrap();
87+
88+
while let Some(item) = stream.next().await {
89+
black_box(item.unwrap());
90+
}
91+
}
92+
93+
pub fn bench_all_files_all_rows(c: &mut Criterion) {
94+
let runtime = Runtime::new().unwrap();
95+
let table = setup(&runtime);
96+
println!("setup complete");
97+
98+
c.bench_function("scan: plan (all files, all rows)", |b| {
99+
b.to_async(&runtime).iter(|| all_files_all_rows(&table))
100+
});
101+
}
102+
103+
pub fn bench_one_file_all_rows(c: &mut Criterion) {
104+
let runtime = Runtime::new().unwrap();
105+
let table = setup(&runtime);
106+
println!("setup complete");
107+
108+
c.bench_function("scan: plan (one file, all rows)", |b| {
109+
b.to_async(&runtime).iter(|| one_file_all_rows(&table))
110+
});
111+
}
112+
113+
pub fn bench_all_files_some_rows(c: &mut Criterion) {
114+
let runtime = Runtime::new().unwrap();
115+
let table = setup(&runtime);
116+
println!("setup complete");
117+
118+
c.bench_function("scan: plan (all files, some rows)", |b| {
119+
b.to_async(&runtime).iter(|| all_files_some_rows(&table))
120+
});
121+
}
122+
123+
pub fn bench_one_file_some_rows(c: &mut Criterion) {
124+
let runtime = Runtime::new().unwrap();
125+
let table = setup(&runtime);
126+
println!("setup complete");
127+
128+
c.bench_function("scan: plan (one file, some rows)", |b| {
129+
b.to_async(&runtime).iter(|| one_file_some_rows(&table))
130+
});
131+
}
132+
133+
criterion_group! {
134+
name = benches;
135+
config = Criterion::default().sample_size(10);
136+
targets = bench_all_files_all_rows, bench_all_files_some_rows, bench_one_file_all_rows, bench_one_file_some_rows
137+
}
138+
139+
criterion_main!(benches);

0 commit comments

Comments
 (0)