Skip to content

Commit f0845a1

Browse files
committed
provide simple function interface
1 parent 2a2fe60 commit f0845a1

File tree

1 file changed

+96
-21
lines changed

1 file changed

+96
-21
lines changed

datafusion/optimizer/src/reorder_join/left_deep_join_plan.rs

Lines changed: 96 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,95 @@ use crate::reorder_join::{
88
query_graph::{NodeId, QueryGraph},
99
};
1010

11+
/// Generates an optimized left-deep join plan from a logical plan using the Ibaraki-Kameda algorithm.
12+
///
13+
/// This function is the main entry point for join reordering optimization. It takes a logical plan
14+
/// that may contain joins along with wrapper operators (filters, sorts, aggregations, etc.) and
15+
/// produces an optimized plan with reordered joins while preserving the wrapper operators.
16+
///
17+
/// # Algorithm Overview
18+
///
19+
/// The optimization process consists of several steps:
20+
///
21+
/// 1. **Extraction**: Separates the join subtree from wrapper operators (filters, sorts, limits, etc.)
22+
/// 2. **Graph Conversion**: Converts the join subtree into a query graph representation where:
23+
/// - Nodes represent base relations (table scans, subqueries, etc.)
24+
/// - Edges represent join conditions between relations
25+
/// 3. **Optimization**: Uses the Ibaraki-Kameda algorithm to find the optimal left-deep join ordering
26+
/// by trying each node as a potential root and selecting the plan with the lowest estimated cost
27+
/// 4. **Reconstruction**: Rebuilds the complete logical plan by applying the wrapper operators
28+
/// to the optimized join plan
29+
///
30+
/// # Left-Deep Join Plans
31+
///
32+
/// A left-deep join plan is a join tree where:
33+
/// - Each join has a relation or previous join result on the left side
34+
/// - Each join has a single relation on the right side
35+
/// - This creates a linear "chain" of joins processed left-to-right
36+
///
37+
/// Example: `((A ⋈ B) ⋈ C) ⋈ D` is left-deep, while `(A ⋈ B) ⋈ (C ⋈ D)` is not.
38+
///
39+
/// Left-deep plans are preferred because they:
40+
/// - Allow pipelining of intermediate results
41+
/// - Work well with hash join implementations
42+
/// - Have predictable memory usage patterns
43+
///
44+
/// # Arguments
45+
///
46+
/// * `plan` - The logical plan to optimize. Must contain at least one join node.
47+
/// * `cost_estimator` - Cost estimator for calculating join costs, cardinality, and selectivity.
48+
/// Used to compare different join orderings and select the optimal one.
49+
///
50+
/// # Returns
51+
///
52+
/// Returns a `LogicalPlan` with optimized join ordering. The plan structure is:
53+
/// - Wrapper operators (filters, sorts, etc.) in their original positions
54+
/// - Joins reordered to minimize estimated execution cost
55+
/// - Join semantics preserved (same result set as input plan)
56+
///
57+
/// # Errors
58+
///
59+
/// Returns an error if:
60+
/// - The plan does not contain any join nodes
61+
/// - Join extraction fails (e.g., joins are not consecutive in the plan tree)
62+
/// - The query graph cannot be constructed from the join subtree
63+
/// - Join reordering optimization fails (no valid join ordering found)
64+
/// - Plan reconstruction fails
65+
///
66+
/// # Example
67+
///
68+
/// ```ignore
69+
/// use datafusion_optimizer::reorder_join::{optimal_left_deep_join_plan, cost::JoinCostEstimator};
70+
/// use std::rc::Rc;
71+
///
72+
/// // Assume we have a plan with joins: customer ⋈ orders ⋈ lineitem
73+
/// let plan = ...; // Your logical plan
74+
/// let cost_estimator: Rc<dyn JoinCostEstimator> = Rc::new(MyCostEstimator::new());
75+
///
76+
/// // Optimize join ordering
77+
/// let optimized = optimal_left_deep_join_plan(plan, cost_estimator)?;
78+
/// // Result might reorder to: lineitem ⋈ orders ⋈ customer (if this is cheaper)
79+
/// ```
80+
pub fn optimal_left_deep_join_plan(
81+
plan: LogicalPlan,
82+
cost_estimator: Rc<dyn JoinCostEstimator>,
83+
) -> Result<LogicalPlan> {
84+
// Extract the join subtree and wrappers
85+
let (join_subtree, wrappers) =
86+
crate::reorder_join::query_graph::extract_join_subtree(plan)?;
87+
88+
// Convert join subtree to query graph
89+
let query_graph = QueryGraph::try_from(join_subtree)?;
90+
91+
// Optimize the joins
92+
let optimized_joins =
93+
query_graph_to_optimal_left_deep_join_plan(query_graph, cost_estimator)?;
94+
95+
// Reconstruct the full plan with wrappers
96+
97+
crate::reorder_join::query_graph::reconstruct_plan(optimized_joins, wrappers)
98+
}
99+
11100
/// Generates an optimized linear join plan from a query graph using the Ibaraki-Kameda algorithm.
12101
///
13102
/// This function finds the optimal join ordering for a query by:
@@ -41,7 +130,7 @@ use crate::reorder_join::{
41130
/// - The query graph is empty or invalid
42131
/// - Tree construction, normalization, or denormalization fails
43132
/// - No valid precedence graph can be generated
44-
pub fn optimal_left_deep_join_plan(
133+
pub fn query_graph_to_optimal_left_deep_join_plan(
45134
query_graph: QueryGraph,
46135
cost_estimator: Rc<dyn JoinCostEstimator>,
47136
) -> Result<LogicalPlan> {
@@ -484,17 +573,16 @@ impl<'graph> PrecedenceTreeNode<'graph> {
484573
query_graph: &QueryGraph,
485574
) -> Result<LogicalPlan> {
486575
// Get the first node's logical plan
487-
let mut current_node_id = self.query_nodes[0].node_id;
576+
let current_node_id = self.query_nodes[0].node_id;
488577
let mut current_plan = query_graph
489578
.get_node(current_node_id)
490579
.ok_or_else(|| plan_datafusion_err!("Node {:?} not found", current_node_id))?
491580
.plan
492581
.as_ref()
493582
.clone();
494583

495-
// Track all processed nodes
496-
let mut processed_nodes = HashSet::new();
497-
processed_nodes.insert(current_node_id);
584+
// Track all processed nodes in order
585+
let mut processed_nodes = vec![current_node_id];
498586

499587
// Walk down the chain, joining each subsequent node
500588
let mut current_chain = &self;
@@ -518,6 +606,7 @@ impl<'graph> PrecedenceTreeNode<'graph> {
518606

519607
let edge = processed_nodes
520608
.iter()
609+
.rev()
521610
.find_map(|&processed_id| {
522611
next_node.connection_with(processed_id, query_graph)
523612
})
@@ -542,8 +631,7 @@ impl<'graph> PrecedenceTreeNode<'graph> {
542631
});
543632

544633
// Move to the next node in the chain
545-
current_node_id = next_node_id;
546-
processed_nodes.insert(next_node_id);
634+
processed_nodes.push(next_node_id);
547635
current_chain = child;
548636
}
549637

@@ -704,21 +792,8 @@ mod tests {
704792
println!("After standard optimization:");
705793
println!("{}", plan.display_indent());
706794

707-
// Extract the join subtree and wrappers
708-
let (join_subtree, wrappers) =
709-
crate::reorder_join::query_graph::extract_join_subtree(plan).unwrap();
710-
711-
// Convert join subtree to query graph
712-
let query_graph = QueryGraph::try_from(join_subtree).unwrap();
713-
714-
// Optimize the joins
715-
let optimized_joins =
716-
optimal_left_deep_join_plan(query_graph, Rc::new(TestCostEstimator)).unwrap();
717-
718-
// Reconstruct the full plan with wrappers
719795
let optimized_plan =
720-
crate::reorder_join::query_graph::reconstruct_plan(optimized_joins, wrappers)
721-
.unwrap();
796+
optimal_left_deep_join_plan(plan, Rc::new(TestCostEstimator)).unwrap();
722797

723798
println!("Optimized Plan:");
724799
println!("{}", optimized_plan.display_indent());

0 commit comments

Comments
 (0)