Skip to content

Commit 3f70c4a

Browse files
committed
explanatory comments
1 parent f36ae17 commit 3f70c4a

File tree

3 files changed

+15
-2
lines changed

3 files changed

+15
-2
lines changed

server/Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@ arrow-array = { version = "53.0.0" }
1515
arrow-json = "53.0.0"
1616
arrow-ipc = { version = "53.0.0", features = ["zstd"] }
1717
arrow-select = "53.0.0"
18-
# datafusion = { git = "https://github.com/apache/datafusion.git", rev = "a64df83502821f18067fb4ff65dd217815b305c9" }
1918
datafusion = "42.0.0"
20-
object_store = { version = "0.11.0", features = ["cloud", "aws"] } # cannot update object_store as datafusion has not caught up
19+
object_store = { version = "0.11.0", features = ["cloud", "aws"] }
2120
parquet = "53.0.0"
2221
arrow-flight = { version = "53.0.0", features = [ "tls" ] }
2322
tonic = {version = "0.12.1", features = ["tls", "transport", "gzip", "zstd"] }

server/src/cli.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,9 @@ impl Cli {
421421
.help("Set a fixed memory limit for query"),
422422
)
423423
.arg(
424+
// RowGroupSize controls the number of rows present in one row group
425+
// More rows = better compression but HIGHER Memory consumption during read/write
426+
// 1048576 is the default value for DataFusion
424427
Arg::new(Self::ROW_GROUP_SIZE)
425428
.long(Self::ROW_GROUP_SIZE)
426429
.env("P_PARQUET_ROW_GROUP_SIZE")

server/src/query.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,20 @@ impl Query {
8686
.with_prefer_existing_sort(true)
8787
.with_round_robin_repartition(true);
8888

89+
// For more details refer https://datafusion.apache.org/user-guide/configs.html
90+
91+
// Reduce the number of rows read (if possible)
8992
config.options_mut().execution.parquet.enable_page_index = true;
93+
94+
// Pushdown filters allows DF to push the filters as far down in the plan as possible
95+
// and thus, reducing the number of rows decoded
9096
config.options_mut().execution.parquet.pushdown_filters = true;
97+
98+
// Reorder filters allows DF to decide the order of filters minimizing the cost of filter evaluation
9199
config.options_mut().execution.parquet.reorder_filters = true;
100+
101+
// Enable StringViewArray
102+
// https://www.influxdata.com/blog/faster-queries-with-stringview-part-one-influxdb/
92103
config
93104
.options_mut()
94105
.execution

0 commit comments

Comments
 (0)