Skip to content

Commit ee1e17b

Browse files
committed
remove optimizer rule
1 parent 2efdcf2 commit ee1e17b

File tree

1 file changed

+0
-156
lines changed
  • datafusion/optimizer/src/reorder_join

1 file changed

+0
-156
lines changed

datafusion/optimizer/src/reorder_join/mod.rs

Lines changed: 0 additions & 156 deletions
Original file line numberDiff line numberDiff line change
@@ -17,162 +17,6 @@
1717

1818
//! Optimizer rule for reordering joins to minimize query execution cost
1919
20-
use std::rc::Rc;
21-
22-
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion};
23-
use datafusion_common::Result;
24-
use datafusion_expr::LogicalPlan;
25-
26-
use crate::optimizer::ApplyOrder;
27-
use crate::{OptimizerConfig, OptimizerRule};
28-
2920
pub mod cost;
3021
pub mod left_deep_join_plan;
3122
pub mod query_graph;
32-
33-
use cost::JoinCostEstimator;
34-
use left_deep_join_plan::optimal_left_deep_join_plan;
35-
use query_graph::{contains_join, QueryGraph};
36-
37-
/// Optimizer rule that reorders joins to minimize query execution cost.
38-
///
39-
/// This rule identifies consecutive join operations in a query plan and reorders
40-
/// them using the Ibaraki-Kameda algorithm. The algorithm:
41-
///
42-
/// 1. Converts a join subtree into a query graph representation
43-
/// 2. Builds precedence trees to explore different join orderings
44-
/// 3. Normalizes and denormalizes the trees to find optimal orderings
45-
/// 4. Selects the ordering with the lowest estimated cost
46-
///
47-
/// The rule only reorders inner joins and requires all joins to be consecutive
48-
/// in the plan tree (no other operations between them).
49-
///
50-
/// # Example
51-
///
52-
/// Given a query plan like:
53-
/// ```text
54-
/// Join(customer.c_custkey = orders.o_custkey)
55-
/// Join(orders.o_orderkey = lineitem.l_orderkey)
56-
/// TableScan(customer)
57-
/// TableScan(orders)
58-
/// TableScan(lineitem)
59-
/// ```
60-
///
61-
/// The optimizer will evaluate different join orderings and select the one
62-
/// that minimizes intermediate result sizes and overall execution cost.
63-
#[derive(Debug)]
64-
pub struct JoinReorder {
65-
cost_estimator: Rc<dyn JoinCostEstimator>,
66-
}
67-
68-
impl JoinReorder {
69-
/// Creates a new join reorder optimizer rule with the given cost estimator
70-
pub fn new(cost_estimator: Rc<dyn JoinCostEstimator>) -> Self {
71-
Self { cost_estimator }
72-
}
73-
}
74-
75-
impl Default for JoinReorder {
76-
fn default() -> Self {
77-
Self {
78-
cost_estimator: Rc::new(cost::DefaultCostEstimator),
79-
}
80-
}
81-
}
82-
83-
impl OptimizerRule for JoinReorder {
84-
fn name(&self) -> &str {
85-
"join_reorder"
86-
}
87-
88-
fn apply_order(&self) -> Option<ApplyOrder> {
89-
// We need bottom-up traversal to process join subtrees from leaves to root
90-
Some(ApplyOrder::BottomUp)
91-
}
92-
93-
fn rewrite(
94-
&self,
95-
plan: LogicalPlan,
96-
_config: &dyn OptimizerConfig,
97-
) -> Result<Transformed<LogicalPlan>> {
98-
// Only try to reorder if this is a join node
99-
if !matches!(plan, LogicalPlan::Join(_)) {
100-
return Ok(Transformed::no(plan));
101-
}
102-
103-
// Check if this join is the root of a consecutive join subtree
104-
// (i.e., all its children are either joins or leaf nodes)
105-
if !is_join_subtree_root(&plan) {
106-
return Ok(Transformed::no(plan));
107-
}
108-
109-
// Try to convert the join subtree to a query graph and optimize it
110-
match optimize_join_subtree(plan.clone(), Rc::clone(&self.cost_estimator)) {
111-
Ok(optimized_plan) => Ok(Transformed::yes(optimized_plan)),
112-
Err(_) => {
113-
// If optimization fails (e.g., unsupported join type), return original plan
114-
Ok(Transformed::no(plan))
115-
}
116-
}
117-
}
118-
}
119-
120-
/// Checks if a plan node is the root of a consecutive join subtree.
121-
///
122-
/// A node is considered a join subtree root if:
123-
/// - It is a Join node
124-
/// - All its descendants are either Join nodes or don't contain any joins
125-
///
126-
/// This ensures we only try to optimize complete join subtrees that can be
127-
/// safely reordered without breaking other operators.
128-
fn is_join_subtree_root(plan: &LogicalPlan) -> bool {
129-
if !matches!(plan, LogicalPlan::Join(_)) {
130-
return false;
131-
}
132-
133-
// Check if all children either are joins themselves or don't contain any joins
134-
let mut all_valid = true;
135-
let _ = plan.apply_children(|child| {
136-
if matches!(child, LogicalPlan::Join(_)) {
137-
// This child is a join, continue checking down the tree
138-
Ok(TreeNodeRecursion::Continue)
139-
} else if !contains_join(child) {
140-
// This child doesn't contain any joins - it's a leaf subtree
141-
Ok(TreeNodeRecursion::Continue)
142-
} else {
143-
// Found a non-join node that contains joins - this breaks the consecutive join pattern
144-
all_valid = false;
145-
Ok(TreeNodeRecursion::Stop)
146-
}
147-
});
148-
149-
all_valid
150-
}
151-
152-
/// Optimizes a join subtree by converting it to a query graph and finding
153-
/// the optimal join ordering.
154-
///
155-
/// # Arguments
156-
///
157-
/// * `plan` - The join subtree to optimize (must be a Join node at the root)
158-
/// * `cost_estimator` - The cost estimator to use for optimization
159-
///
160-
/// # Returns
161-
///
162-
/// Returns an optimized LogicalPlan with joins reordered for minimal cost.
163-
///
164-
/// # Errors
165-
///
166-
/// Returns an error if:
167-
/// - The plan cannot be converted to a query graph
168-
/// - The optimization algorithm fails
169-
fn optimize_join_subtree(
170-
plan: LogicalPlan,
171-
cost_estimator: Rc<dyn JoinCostEstimator>,
172-
) -> Result<LogicalPlan> {
173-
// Convert the join subtree to a query graph
174-
let query_graph = QueryGraph::try_from(plan)?;
175-
176-
// Use the Ibaraki-Kameda algorithm to find the optimal join ordering
177-
optimal_left_deep_join_plan(query_graph, cost_estimator)
178-
}

0 commit comments

Comments
 (0)