@@ -8,6 +8,95 @@ use crate::reorder_join::{
88 query_graph:: { NodeId , QueryGraph } ,
99} ;
1010
11+ /// Generates an optimized left-deep join plan from a logical plan using the Ibaraki-Kameda algorithm.
12+ ///
13+ /// This function is the main entry point for join reordering optimization. It takes a logical plan
14+ /// that may contain joins along with wrapper operators (filters, sorts, aggregations, etc.) and
15+ /// produces an optimized plan with reordered joins while preserving the wrapper operators.
16+ ///
17+ /// # Algorithm Overview
18+ ///
19+ /// The optimization process consists of several steps:
20+ ///
21+ /// 1. **Extraction**: Separates the join subtree from wrapper operators (filters, sorts, limits, etc.)
22+ /// 2. **Graph Conversion**: Converts the join subtree into a query graph representation where:
23+ /// - Nodes represent base relations (table scans, subqueries, etc.)
24+ /// - Edges represent join conditions between relations
25+ /// 3. **Optimization**: Uses the Ibaraki-Kameda algorithm to find the optimal left-deep join ordering
26+ /// by trying each node as a potential root and selecting the plan with the lowest estimated cost
27+ /// 4. **Reconstruction**: Rebuilds the complete logical plan by applying the wrapper operators
28+ /// to the optimized join plan
29+ ///
30+ /// # Left-Deep Join Plans
31+ ///
32+ /// A left-deep join plan is a join tree where:
33+ /// - Each join has a relation or previous join result on the left side
34+ /// - Each join has a single relation on the right side
35+ /// - This creates a linear "chain" of joins processed left-to-right
36+ ///
37+ /// Example: `((A ⋈ B) ⋈ C) ⋈ D` is left-deep, while `(A ⋈ B) ⋈ (C ⋈ D)` is not.
38+ ///
39+ /// Left-deep plans are preferred because they:
40+ /// - Allow pipelining of intermediate results
41+ /// - Work well with hash join implementations
42+ /// - Have predictable memory usage patterns
43+ ///
44+ /// # Arguments
45+ ///
46+ /// * `plan` - The logical plan to optimize. Must contain at least one join node.
47+ /// * `cost_estimator` - Cost estimator for calculating join costs, cardinality, and selectivity.
48+ /// Used to compare different join orderings and select the optimal one.
49+ ///
50+ /// # Returns
51+ ///
52+ /// Returns a `LogicalPlan` with optimized join ordering. The plan structure is:
53+ /// - Wrapper operators (filters, sorts, etc.) in their original positions
54+ /// - Joins reordered to minimize estimated execution cost
55+ /// - Join semantics preserved (same result set as input plan)
56+ ///
57+ /// # Errors
58+ ///
59+ /// Returns an error if:
60+ /// - The plan does not contain any join nodes
61+ /// - Join extraction fails (e.g., joins are not consecutive in the plan tree)
62+ /// - The query graph cannot be constructed from the join subtree
63+ /// - Join reordering optimization fails (no valid join ordering found)
64+ /// - Plan reconstruction fails
65+ ///
66+ /// # Example
67+ ///
68+ /// ```ignore
69+ /// use datafusion_optimizer::reorder_join::{optimal_left_deep_join_plan, cost::JoinCostEstimator};
70+ /// use std::rc::Rc;
71+ ///
72+ /// // Assume we have a plan with joins: customer ⋈ orders ⋈ lineitem
73+ /// let plan = ...; // Your logical plan
74+ /// let cost_estimator: Rc<dyn JoinCostEstimator> = Rc::new(MyCostEstimator::new());
75+ ///
76+ /// // Optimize join ordering
77+ /// let optimized = optimal_left_deep_join_plan(plan, cost_estimator)?;
78+ /// // Result might reorder to: lineitem ⋈ orders ⋈ customer (if this is cheaper)
79+ /// ```
80+ pub fn optimal_left_deep_join_plan (
81+ plan : LogicalPlan ,
82+ cost_estimator : Rc < dyn JoinCostEstimator > ,
83+ ) -> Result < LogicalPlan > {
84+ // Extract the join subtree and wrappers
85+ let ( join_subtree, wrappers) =
86+ crate :: reorder_join:: query_graph:: extract_join_subtree ( plan) ?;
87+
88+ // Convert join subtree to query graph
89+ let query_graph = QueryGraph :: try_from ( join_subtree) ?;
90+
91+ // Optimize the joins
92+ let optimized_joins =
93+ query_graph_to_optimal_left_deep_join_plan ( query_graph, cost_estimator) ?;
94+
95+ // Reconstruct the full plan with wrappers
96+
97+ crate :: reorder_join:: query_graph:: reconstruct_plan ( optimized_joins, wrappers)
98+ }
99+
11100/// Generates an optimized linear join plan from a query graph using the Ibaraki-Kameda algorithm.
12101///
13102/// This function finds the optimal join ordering for a query by:
@@ -41,7 +130,7 @@ use crate::reorder_join::{
41130/// - The query graph is empty or invalid
42131/// - Tree construction, normalization, or denormalization fails
43132/// - No valid precedence graph can be generated
44- pub fn optimal_left_deep_join_plan (
133+ pub fn query_graph_to_optimal_left_deep_join_plan (
45134 query_graph : QueryGraph ,
46135 cost_estimator : Rc < dyn JoinCostEstimator > ,
47136) -> Result < LogicalPlan > {
@@ -484,17 +573,16 @@ impl<'graph> PrecedenceTreeNode<'graph> {
484573 query_graph : & QueryGraph ,
485574 ) -> Result < LogicalPlan > {
486575 // Get the first node's logical plan
487- let mut current_node_id = self . query_nodes [ 0 ] . node_id ;
576+ let current_node_id = self . query_nodes [ 0 ] . node_id ;
488577 let mut current_plan = query_graph
489578 . get_node ( current_node_id)
490579 . ok_or_else ( || plan_datafusion_err ! ( "Node {:?} not found" , current_node_id) ) ?
491580 . plan
492581 . as_ref ( )
493582 . clone ( ) ;
494583
495- // Track all processed nodes
496- let mut processed_nodes = HashSet :: new ( ) ;
497- processed_nodes. insert ( current_node_id) ;
584+ // Track all processed nodes in order
585+ let mut processed_nodes = vec ! [ current_node_id] ;
498586
499587 // Walk down the chain, joining each subsequent node
500588 let mut current_chain = & self ;
@@ -518,6 +606,7 @@ impl<'graph> PrecedenceTreeNode<'graph> {
518606
519607 let edge = processed_nodes
520608 . iter ( )
609+ . rev ( )
521610 . find_map ( |& processed_id| {
522611 next_node. connection_with ( processed_id, query_graph)
523612 } )
@@ -542,8 +631,7 @@ impl<'graph> PrecedenceTreeNode<'graph> {
542631 } ) ;
543632
544633 // Move to the next node in the chain
545- current_node_id = next_node_id;
546- processed_nodes. insert ( next_node_id) ;
634+ processed_nodes. push ( next_node_id) ;
547635 current_chain = child;
548636 }
549637
@@ -704,21 +792,8 @@ mod tests {
704792 println ! ( "After standard optimization:" ) ;
705793 println ! ( "{}" , plan. display_indent( ) ) ;
706794
707- // Extract the join subtree and wrappers
708- let ( join_subtree, wrappers) =
709- crate :: reorder_join:: query_graph:: extract_join_subtree ( plan) . unwrap ( ) ;
710-
711- // Convert join subtree to query graph
712- let query_graph = QueryGraph :: try_from ( join_subtree) . unwrap ( ) ;
713-
714- // Optimize the joins
715- let optimized_joins =
716- optimal_left_deep_join_plan ( query_graph, Rc :: new ( TestCostEstimator ) ) . unwrap ( ) ;
717-
718- // Reconstruct the full plan with wrappers
719795 let optimized_plan =
720- crate :: reorder_join:: query_graph:: reconstruct_plan ( optimized_joins, wrappers)
721- . unwrap ( ) ;
796+ optimal_left_deep_join_plan ( plan, Rc :: new ( TestCostEstimator ) ) . unwrap ( ) ;
722797
723798 println ! ( "Optimized Plan:" ) ;
724799 println ! ( "{}" , optimized_plan. display_indent( ) ) ;
0 commit comments