Skip to content

Commit a1b6c9b

Browse files
committed
feat: performance testing harness and perf tests for scan file plan
1 parent ab4f69a commit a1b6c9b

File tree

13 files changed

+685
-13
lines changed

13 files changed

+685
-13
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,6 @@
2121
.vscode
2222
**/.DS_Store
2323
dist/*
24+
crates/iceberg/testdata/performance/raw_data/*
25+
26+
crates/iceberg/testdata/performance/warehouse

crates/iceberg/Cargo.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,15 @@ url = { workspace = true }
7979
uuid = { workspace = true }
8080

8181
[dev-dependencies]
82+
criterion = { version = "0.3", features = ["async_tokio", "async_futures"] }
8283
ctor = { workspace = true }
84+
futures-util = "0.3"
85+
iceberg-catalog-rest = { path = "../catalog/rest" }
8386
iceberg_test_utils = { path = "../test_utils", features = ["tests"] }
8487
pretty_assertions = { workspace = true }
8588
tempfile = { workspace = true }
8689
tera = { workspace = true }
90+
91+
[[bench]]
92+
name = "table_scan_plan_files"
93+
harness = false
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use criterion::*;
19+
use futures_util::StreamExt;
20+
use tokio::runtime::Runtime;
21+
22+
mod utils;
23+
use utils::build_catalog;
24+
25+
use iceberg::expr::Reference;
26+
use iceberg::spec::Datum;
27+
use iceberg::table::Table;
28+
use iceberg::Catalog;
29+
30+
async fn setup_async() -> Table {
31+
let catalog = build_catalog().await;
32+
let namespaces = catalog.list_namespaces(None).await.unwrap();
33+
let table_idents = catalog.list_tables(&namespaces[0]).await.unwrap();
34+
catalog.load_table(&table_idents[0]).await.unwrap()
35+
}
36+
37+
fn setup(runtime: &Runtime) -> Table {
38+
runtime.block_on(setup_async())
39+
}
40+
41+
async fn all_files_all_rows(table: &Table) {
42+
let scan = table.scan().build().unwrap();
43+
let mut stream = scan.plan_files().await.unwrap();
44+
45+
while let Some(item) = stream.next().await {
46+
black_box(item.unwrap());
47+
}
48+
}
49+
50+
async fn one_file_all_rows(table: &Table) {
51+
let scan = table
52+
.scan()
53+
.with_filter(
54+
Reference::new("tpep_pickup_datetime")
55+
.greater_than_or_equal_to(
56+
Datum::timestamptz_from_str("2024-02-01T00:00:00.000 UTC").unwrap(),
57+
)
58+
.and(Reference::new("tpep_pickup_datetime").less_than(
59+
Datum::timestamptz_from_str("2024-02-02T00:00:00.000 UTC").unwrap(),
60+
)),
61+
)
62+
.build()
63+
.unwrap();
64+
let mut stream = scan.plan_files().await.unwrap();
65+
66+
while let Some(item) = stream.next().await {
67+
black_box(item.unwrap());
68+
}
69+
}
70+
71+
async fn all_files_some_rows(table: &Table) {
72+
let scan = table
73+
.scan()
74+
.with_filter(Reference::new("passenger_count").equal_to(Datum::double(1.0)))
75+
.build()
76+
.unwrap();
77+
let mut stream = scan.plan_files().await.unwrap();
78+
79+
while let Some(item) = stream.next().await {
80+
black_box(item.unwrap());
81+
}
82+
}
83+
84+
async fn one_file_some_rows(table: &Table) {
85+
let scan = table
86+
.scan()
87+
.with_filter(
88+
Reference::new("tpep_pickup_datetime")
89+
.greater_than_or_equal_to(
90+
Datum::timestamptz_from_str("2024-02-01T00:00:00.000 UTC").unwrap(),
91+
)
92+
.and(Reference::new("tpep_pickup_datetime").less_than(
93+
Datum::timestamptz_from_str("2024-02-02T00:00:00.000 UTC").unwrap(),
94+
))
95+
.and(Reference::new("passenger_count").equal_to(Datum::double(1.0))),
96+
)
97+
.build()
98+
.unwrap();
99+
let mut stream = scan.plan_files().await.unwrap();
100+
101+
while let Some(item) = stream.next().await {
102+
black_box(item.unwrap());
103+
}
104+
}
105+
106+
pub fn bench_all_files_all_rows(c: &mut Criterion) {
107+
let runtime = Runtime::new().unwrap();
108+
let table = setup(&runtime);
109+
println!("setup complete");
110+
111+
c.bench_function("all_files_all_rows", |b| {
112+
b.to_async(&runtime).iter(|| all_files_all_rows(&table))
113+
});
114+
}
115+
116+
pub fn bench_one_file_all_rows(c: &mut Criterion) {
117+
let runtime = Runtime::new().unwrap();
118+
let table = setup(&runtime);
119+
println!("setup complete");
120+
121+
c.bench_function("one_file_all_rows", |b| {
122+
b.to_async(&runtime).iter(|| one_file_all_rows(&table))
123+
});
124+
}
125+
126+
pub fn bench_all_files_some_rows(c: &mut Criterion) {
127+
let runtime = Runtime::new().unwrap();
128+
let table = setup(&runtime);
129+
println!("setup complete");
130+
131+
c.bench_function("all_files_some_rows", |b| {
132+
b.to_async(&runtime).iter(|| all_files_some_rows(&table))
133+
});
134+
}
135+
136+
pub fn bench_one_file_some_rows(c: &mut Criterion) {
137+
let runtime = Runtime::new().unwrap();
138+
let table = setup(&runtime);
139+
println!("setup complete");
140+
141+
c.bench_function("one_file_some_rows", |b| {
142+
b.to_async(&runtime).iter(|| one_file_some_rows(&table))
143+
});
144+
}
145+
146+
criterion_group! {
147+
name = benches;
148+
config = Criterion::default().sample_size(10);
149+
targets = bench_all_files_all_rows, bench_all_files_some_rows, bench_one_file_all_rows, bench_one_file_some_rows
150+
}
151+
152+
criterion_main!(benches);

crates/iceberg/benches/utils.rs

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use iceberg::io::{S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY};
19+
use iceberg_catalog_rest::{RestCatalog, RestCatalogConfig};
20+
use iceberg_test_utils::docker::DockerCompose;
21+
use std::collections::HashMap;
22+
23+
pub fn get_docker_compose() -> DockerCompose {
24+
DockerCompose::new(
25+
"iceberg-rust-performance",
26+
format!(
27+
"{}/../iceberg/testdata/performance",
28+
env!("CARGO_MANIFEST_DIR")
29+
),
30+
)
31+
}
32+
33+
pub async fn build_catalog() -> RestCatalog {
34+
let docker_compose = get_docker_compose();
35+
36+
// determine which ports on the host that docker has exposed the specified port to for the given containers
37+
let rest_api_host_port = docker_compose.get_host_port("rest", 8181);
38+
let haproxy_host_port = docker_compose.get_host_port("haproxy", 9080);
39+
40+
let catalog_uri = format!("http://localhost:{}", rest_api_host_port);
41+
let haproxy_uri = format!("http://localhost:{}", haproxy_host_port);
42+
43+
let user_props = HashMap::from_iter(
44+
vec![
45+
(S3_ENDPOINT.to_string(), haproxy_uri),
46+
(S3_ACCESS_KEY_ID.to_string(), "admin".to_string()),
47+
(S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()),
48+
(S3_REGION.to_string(), "us-east-1".to_string()),
49+
]
50+
.into_iter(),
51+
);
52+
53+
RestCatalog::new(
54+
RestCatalogConfig::builder()
55+
.uri(catalog_uri)
56+
.props(user_props)
57+
.build(),
58+
)
59+
}
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
version: "3"
19+
20+
services:
21+
spark-iceberg:
22+
image: tabulario/spark-iceberg
23+
build: spark/
24+
networks:
25+
iceberg_perf:
26+
depends_on:
27+
- rest
28+
- minio
29+
volumes:
30+
- ./warehouse:/home/iceberg/warehouse
31+
- ./notebooks:/home/iceberg/notebooks/notebooks
32+
- ./raw_data:/home/iceberg/raw_data
33+
- ./spark_scripts:/home/iceberg/spark_scripts
34+
environment:
35+
- AWS_ACCESS_KEY_ID=admin
36+
- AWS_SECRET_ACCESS_KEY=password
37+
- AWS_REGION=us-east-1
38+
ports:
39+
- 8888:8888
40+
- 8080:8080
41+
- 10000:10000
42+
- 10001:10001
43+
44+
rest:
45+
image: tabulario/iceberg-rest
46+
networks:
47+
iceberg_perf:
48+
ports:
49+
- 8181:8181
50+
volumes:
51+
- ./warehouse:/warehouse
52+
environment:
53+
- AWS_ACCESS_KEY_ID=admin
54+
- AWS_SECRET_ACCESS_KEY=password
55+
- AWS_REGION=us-east-1
56+
- CATALOG_WAREHOUSE=s3://warehouse/
57+
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
58+
- CATALOG_S3_ENDPOINT=http://minio:9000
59+
- CATALOG_URI=jdbc:sqlite:file:/warehouse/catalog.sqlite
60+
61+
minio:
62+
image: minio/minio
63+
environment:
64+
- MINIO_ROOT_USER=admin
65+
- MINIO_ROOT_PASSWORD=password
66+
- MINIO_DOMAIN=minio
67+
networks:
68+
iceberg_perf:
69+
aliases:
70+
- warehouse.minio
71+
volumes:
72+
- ./warehouse:/warehouse
73+
ports:
74+
- 9001:9001
75+
- 9000:9000
76+
command: ["server", "/warehouse", "--console-address", ":9001"]
77+
78+
mc:
79+
depends_on:
80+
- minio
81+
image: minio/mc
82+
container_name: mc
83+
networks:
84+
iceberg_perf:
85+
environment:
86+
- AWS_ACCESS_KEY_ID=admin
87+
- AWS_SECRET_ACCESS_KEY=password
88+
- AWS_REGION=us-east-1
89+
entrypoint: >
90+
/bin/sh -c "
91+
until (/usr/bin/mc config host add minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done;
92+
/usr/bin/mc mb minio/warehouse;
93+
/usr/bin/mc policy set public minio/warehouse;
94+
tail -f /dev/null
95+
"
96+
97+
haproxy:
98+
image: haproxy:lts-bookworm
99+
networks:
100+
iceberg_perf:
101+
ports:
102+
- 9080
103+
volumes:
104+
- type: bind
105+
source: ./haproxy.cfg
106+
target: /usr/local/etc/haproxy/haproxy.cfg
107+
read_only: true
108+
depends_on:
109+
- minio
110+
111+
networks:
112+
iceberg_perf:
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
global
19+
log stdout format raw local0 debug
20+
fd-hard-limit 50000
21+
22+
defaults
23+
mode http
24+
timeout client 10s
25+
timeout connect 5s
26+
timeout server 10s
27+
timeout http-request 10s
28+
log global
29+
30+
frontend slowio
31+
bind :9080
32+
default_backend minio
33+
tcp-request inspect-delay 100ms
34+
tcp-request content accept if WAIT_END
35+
filter bwlim-out mylimit default-limit 625000 default-period 1s
36+
37+
backend minio
38+
# server s1 minio.iceberg-performance-tests.orb.local:9000 check maxconn 4
39+
server s1 minio:9000 check maxconn 4

0 commit comments

Comments
 (0)