Skip to content

Commit a7512d7

Browse files
committed
test: update perf test table config and benches to be more useful when measuring performance of row group filtering and row selection
1 parent 37459d0 commit a7512d7

File tree

3 files changed

+10
-5
lines changed

3 files changed

+10
-5
lines changed

crates/iceberg/benches/table_scan_execute_query.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ pub fn bench_read_all_files_some_rows(c: &mut Criterion) {
4343
let table = setup(&runtime);
4444
let scan = table
4545
.scan()
46-
.with_filter(Reference::new("passenger_count").greater_than(Datum::double(1.0)))
46+
.with_filter(Reference::new("fare_amount").greater_than(Datum::double(50.0)))
4747
.build()
4848
.unwrap();
4949
let tasks = create_file_plan(&runtime, scan);
@@ -98,7 +98,7 @@ pub fn bench_read_some_files_some_rows(c: &mut Criterion) {
9898
.and(Reference::new("tpep_pickup_datetime").less_than(
9999
Datum::timestamptz_from_str("2024-02-02T00:00:00.000 UTC").unwrap(),
100100
))
101-
.and(Reference::new("passenger_count").greater_than(Datum::double(1.0))),
101+
.and(Reference::new("fare_amount").greater_than(Datum::double(50.0))),
102102
)
103103
.build()
104104
.unwrap();

crates/iceberg/benches/table_scan_plan_files.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ async fn one_file_all_rows(table: &Table) {
5757
async fn all_files_some_rows(table: &Table) {
5858
let scan = table
5959
.scan()
60-
.with_filter(Reference::new("passenger_count").equal_to(Datum::double(1.0)))
60+
.with_filter(Reference::new("fare_amount").equal_to(Datum::double(50.0)))
6161
.build()
6262
.unwrap();
6363
let mut stream = scan.plan_files().await.unwrap();
@@ -79,7 +79,7 @@ async fn one_file_some_rows(table: &Table) {
7979
.and(Reference::new("tpep_pickup_datetime").less_than(
8080
Datum::timestamptz_from_str("2024-02-02T00:00:00.000 UTC").unwrap(),
8181
))
82-
.and(Reference::new("passenger_count").equal_to(Datum::double(1.0))),
82+
.and(Reference::new("fare_amount").equal_to(Datum::double(50.0))),
8383
)
8484
.build()
8585
.unwrap();

crates/iceberg/testdata/performance/spark_scripts/setup.sql

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,12 @@ CREATE TABLE IF NOT EXISTS nyc.taxis (
4242
USING iceberg
4343
PARTITIONED BY (days(tpep_pickup_datetime));
4444

45-
ALTER TABLE nyc.taxis WRITE ORDERED BY tpep_pickup_datetime, passenger_count;
45+
46+
ALTER TABLE nyc.taxis SET TBLPROPERTIES (
47+
'write.parquet.row-group-size-bytes'='131072',
48+
'write.parquet.page-row-limit'='200'
49+
);
50+
ALTER TABLE nyc.taxis WRITE DISTRIBUTED BY PARTITION LOCALLY ORDERED BY fare_amount;
4651

4752
CREATE TEMPORARY VIEW parquetTable
4853
USING org.apache.spark.sql.parquet

0 commit comments

Comments
 (0)