From 1a398ab25ea6e2d979531859330d399ad7b2bd83 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Tue, 10 Sep 2024 18:34:37 +0900 Subject: [PATCH 1/2] switch to a new CFG selection logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit aims to port the new CFG selection logic implemented in aviatesk/JET.jl#654 to LCU, so that it can be shared between LCU and JET. The new algorithm is based on what was proposed in [Wei84][^Wei84]. If there is even one active block in the blocks reachable from a conditional branch up to its successors' nearest common post-dominator (referred to as "𝑰𝑵𝑭𝑳" in the paper), it is necessary to follow that conditional branch and execute the code. Otherwise, execution can be short-circuited[^short-circuit] from the conditional branch to the nearest common post-dominator. Regarding the `GotoNode`, it is now marked only for active blocks after all requirements have converged, rather than marking it inside the `add_loop!` or such. This approach eliminates the need to add unnecessary blocks inside the loop, and the need to use `add_loop!` while allowing the required CFG to be executed safely. [^Wei84]: M. Weiser, "Program Slicing," IEEE Transactions on Software Engineering, 10, pages 352-357, July 1984. https://ieeexplore.ieee.org/document/5010248 [^short-circuit]: It is important to note that in Julia's IR (`CodeInfo`), "short-circuiting" a specific code region is not a simple task. Simply ignoring the path to the post-dominator does not guarantee fall-through to the post-dominator. Therefore, a more careful implementation is required for this aspect. --- src/codeedges.jl | 175 +++++++++++++++++++++++++++++++++++----------- src/domtree.jl | 27 +++++++ src/packagedef.jl | 4 +- 3 files changed, 165 insertions(+), 41 deletions(-) diff --git a/src/codeedges.jl b/src/codeedges.jl index 93b07bd..eb1cfd6 100644 --- a/src/codeedges.jl +++ b/src/codeedges.jl @@ -108,7 +108,7 @@ function print_with_code(preprint, postprint, io::IO, src::CodeInfo) :displaysize=>displaysize(io), :SOURCE_SLOTNAMES => Base.sourceinfo_slotnames(src)) used = BitSet() - cfg = Core.Compiler.compute_basic_blocks(src.code) + cfg = compute_basic_blocks(src.code) for stmt in src.code Core.Compiler.scan_ssa_use!(push!, used, stmt) end @@ -629,8 +629,7 @@ function lines_required!(isrequired::AbstractVector{Bool}, objs, src::CodeInfo, objs = add_requests!(isrequired, objs, edges, norequire) # Compute basic blocks, which we'll use to make sure we mark necessary control-flow - cfg = Core.Compiler.compute_basic_blocks(src.code) # needed for control-flow analysis - domtree = construct_domtree(cfg.blocks) + cfg = compute_basic_blocks(src.code) # needed for control-flow analysis postdomtree = construct_postdomtree(cfg.blocks) # We'll mostly use generic graph traversal to discover all the lines we need, @@ -651,7 +650,7 @@ function lines_required!(isrequired::AbstractVector{Bool}, objs, src::CodeInfo, # Add control-flow changed |= add_loops!(isrequired, cfg) - changed |= add_control_flow!(isrequired, cfg, domtree, postdomtree) + changed |= add_control_flow!(isrequired, src, cfg, postdomtree) # So far, everything is generic graph traversal. Now we add some domain-specific information changed |= add_typedefs!(isrequired, src, edges, typedefs, norequire) @@ -659,6 +658,10 @@ function lines_required!(isrequired::AbstractVector{Bool}, objs, src::CodeInfo, iter += 1 # just for diagnostics end + + # now mark the active goto nodes + add_active_gotos!(isrequired, src, cfg, postdomtree) + return isrequired end @@ -752,48 +755,140 @@ function add_loops!(isrequired, cfg) return changed end -function add_control_flow!(isrequired, cfg, domtree, postdomtree) - changed, _changed = false, true - blocks = cfg.blocks - nblocks = length(blocks) - while _changed - _changed = false - for (ibb, bb) in enumerate(blocks) - r = rng(bb) - if any(view(isrequired, r)) - # Walk up the dominators - jbb = ibb - while jbb != 1 - jdbb = domtree.idoms_bb[jbb] - dbb = blocks[jdbb] - # Check the successors; if jbb doesn't post-dominate, mark the last statement - for s in dbb.succs - if !postdominates(postdomtree, jbb, s) - idxlast = rng(dbb)[end] - _changed |= !isrequired[idxlast] - isrequired[idxlast] = true - break - end - end - jbb = jdbb +using Core: CodeInfo +using Core.Compiler: CFG, BasicBlock, compute_basic_blocks + +# The goal of this function is to request concretization of the minimal necessary control +# flow to evaluate statements whose concretization have already been requested. +# The basic algorithm is based on what was proposed in [^Wei84]. If there is even one active +# block in the blocks reachable from a conditional branch up to its successors' nearest +# common post-dominator (referred to as 𝑰𝑵𝑭𝑳 in the paper), it is necessary to follow +# that conditional branch and execute the code. Otherwise, execution can be short-circuited +# from the conditional branch to the nearest common post-dominator. +# +# COMBAK: It is important to note that in Julia's intermediate code representation (`CodeInfo`), +# "short-circuiting" a specific code region is not a simple task. Simply ignoring the path +# to the post-dominator does not guarantee fall-through to the post-dominator. Therefore, +# a more careful implementation is required for this aspect. +# +# [Wei84]: M. Weiser, "Program Slicing," IEEE Transactions on Software Engineering, 10, pages 352-357, July 1984. +function add_control_flow!(isrequired, src::CodeInfo, cfg::CFG, postdomtree) + local changed::Bool = false + function mark_isrequired!(idx::Int) + if !isrequired[idx] + changed |= isrequired[idx] = true + return true + end + return false + end + for bbidx = 1:length(cfg.blocks) # forward traversal + bb = cfg.blocks[bbidx] + nsuccs = length(bb.succs) + if nsuccs == 0 + continue + elseif nsuccs == 1 + continue # leave a fall-through terminator unmarked: `GotoNode`s are marked later + elseif nsuccs == 2 + termidx = bb.stmts[end] + @assert is_conditional_terminator(src.code[termidx]) "invalid IR" + if is_conditional_block_active(isrequired, bb, cfg, postdomtree) + mark_isrequired!(termidx) + else + # fall-through to the post dominator block (by short-circuiting all statements between) + end + end + end + return changed +end + +is_conditional_terminator(@nospecialize stmt) = stmt isa GotoIfNot || + (@static @isdefined(EnterNode) ? stmt isa EnterNode : isexpr(stmt, :enter)) + +function is_conditional_block_active(isrequired, bb::BasicBlock, cfg::CFG, postdomtree) + return visit_𝑰𝑵𝑭𝑳_blocks(bb, cfg, postdomtree) do postdominator::Int, 𝑰𝑵𝑭𝑳::BitSet + for blk in 𝑰𝑵𝑭𝑳 + if blk == postdominator + continue # skip the post-dominator block and continue to a next infl block + end + if any(@view isrequired[cfg.blocks[blk].stmts]) + return true + end + end + return false + end +end + +function visit_𝑰𝑵𝑭𝑳_blocks(func, bb::BasicBlock, cfg::CFG, postdomtree) + succ1, succ2 = bb.succs + postdominator = nearest_common_dominator(postdomtree, succ1, succ2) + 𝑰𝑵𝑭𝑳 = reachable_blocks(cfg, succ1, postdominator) ∪ reachable_blocks(cfg, succ2, postdominator) + return func(postdominator, 𝑰𝑵𝑭𝑳) +end + +function reachable_blocks(cfg, from_bb::Int, to_bb::Int) + worklist = Int[from_bb] + visited = BitSet(from_bb) + if to_bb == from_bb + return visited + end + push!(visited, to_bb) + function visit!(bb::Int) + if bb ∉ visited + push!(visited, bb) + push!(worklist, bb) + end + end + while !isempty(worklist) + foreach(visit!, cfg.blocks[pop!(worklist)].succs) + end + return visited +end + +function add_active_gotos!(isrequired, src::CodeInfo, cfg::CFG, postdomtree) + dead_blocks = compute_dead_blocks(isrequired, src, cfg, postdomtree) + changed = false + for bbidx = 1:length(cfg.blocks) + if bbidx ∉ dead_blocks + bb = cfg.blocks[bbidx] + nsuccs = length(bb.succs) + if nsuccs == 1 + termidx = bb.stmts[end] + if src.code[termidx] isa GotoNode + changed |= isrequired[termidx] = true end - # Walk down the post-dominators, including self - jbb = ibb - while jbb != 0 && jbb < nblocks - pdbb = blocks[jbb] - # Check if the exit of this block is a GotoNode or `return` - if length(pdbb.succs) < 2 - idxlast = rng(pdbb)[end] - _changed |= !isrequired[idxlast] - isrequired[idxlast] = true + end + end + end + return changed +end + +# find dead blocks using the same approach as `add_control_flow!`, for the converged `isrequired` +function compute_dead_blocks(isrequired, src::CodeInfo, cfg::CFG, postdomtree) + dead_blocks = BitSet() + for bbidx = 1:length(cfg.blocks) + bb = cfg.blocks[bbidx] + nsuccs = length(bb.succs) + if nsuccs == 2 + termidx = bb.stmts[end] + @assert is_conditional_terminator(src.code[termidx]) "invalid IR" + visit_𝑰𝑵𝑭𝑳_blocks(bb, cfg, postdomtree) do postdominator::Int, 𝑰𝑵𝑭𝑳::BitSet + is_𝑰𝑵𝑭𝑳_active = false + for blk in 𝑰𝑵𝑭𝑳 + if blk == postdominator + continue # skip the post-dominator block and continue to a next infl block + end + if any(@view isrequired[cfg.blocks[blk].stmts]) + is_𝑰𝑵𝑭𝑳_active |= true + break end - jbb = postdomtree.idoms_bb[jbb] + end + if !is_𝑰𝑵𝑭𝑳_active + union!(dead_blocks, delete!(𝑰𝑵𝑭𝑳, postdominator)) end end end - changed |= _changed end - return changed + return dead_blocks end # Do a traveral of "numbered" predecessors and find statement ranges and names of type definitions diff --git a/src/domtree.jl b/src/domtree.jl index 0693b46..90513e4 100644 --- a/src/domtree.jl +++ b/src/domtree.jl @@ -412,3 +412,30 @@ function _dominates(domtree::GenericDomTree, bb1::BBNumber, bb2::BBNumber) end return bb1 == bb2 end + +""" + nearest_common_dominator(domtree::GenericDomTree, a::BBNumber, b::BBNumber) + +Compute the nearest common (post-)dominator of `a` and `b`. +""" +function nearest_common_dominator(domtree::GenericDomTree, a::BBNumber, b::BBNumber) + a == 0 && return a + b == 0 && return b + alevel = domtree.nodes[a].level + blevel = domtree.nodes[b].level + # W.l.g. assume blevel <= alevel + if alevel < blevel + a, b = b, a + alevel, blevel = blevel, alevel + end + while alevel > blevel + a = domtree.idoms_bb[a] + alevel -= 1 + end + while a != b && a != 0 + a = domtree.idoms_bb[a] + b = domtree.idoms_bb[b] + end + @assert a == b + return a +end diff --git a/src/packagedef.jl b/src/packagedef.jl index 75833e8..31aac59 100644 --- a/src/packagedef.jl +++ b/src/packagedef.jl @@ -2,7 +2,8 @@ if isdefined(Base, :Experimental) && isdefined(Base.Experimental, Symbol("@optle @eval Base.Experimental.@optlevel 1 end -using Core: SimpleVector, CodeInfo, NewvarNode, GotoNode +using Core: SimpleVector +using Core.IR using Base.Meta: isexpr const SSAValues = Union{Core.Compiler.SSAValue, JuliaInterpreter.SSAValue} @@ -22,6 +23,7 @@ else const construct_domtree = Core.Compiler.construct_domtree const construct_postdomtree = Core.Compiler.construct_postdomtree const postdominates = Core.Compiler.postdominates + const nearest_common_dominator = Core.Compiler.nearest_common_dominator end # precompilation From fb5896d2de5805f330c5d3478c61ce36da25bd34 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Tue, 10 Sep 2024 18:47:05 +0900 Subject: [PATCH 2/2] remove `add_loops!` --- src/codeedges.jl | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/src/codeedges.jl b/src/codeedges.jl index eb1cfd6..8a1ec0b 100644 --- a/src/codeedges.jl +++ b/src/codeedges.jl @@ -649,7 +649,6 @@ function lines_required!(isrequired::AbstractVector{Bool}, objs, src::CodeInfo, changed |= add_named_dependencies!(isrequired, edges, objs, norequire) # Add control-flow - changed |= add_loops!(isrequired, cfg) changed |= add_control_flow!(isrequired, src, cfg, postdomtree) # So far, everything is generic graph traversal. Now we add some domain-specific information @@ -731,30 +730,6 @@ end ## Add control-flow -# Mark loops that contain evaluated statements -function add_loops!(isrequired, cfg) - changed = false - for (ibb, bb) in enumerate(cfg.blocks) - needed = false - for ibbp in bb.preds - # Is there a backwards-pointing predecessor, and if so are there any required statements between the two? - ibbp > ibb || continue # not a loop-block predecessor - r, rp = rng(bb), rng(cfg.blocks[ibbp]) - r = first(r):first(rp)-1 - needed |= any(view(isrequired, r)) - end - if needed - # Mark the final statement of all predecessors - for ibbp in bb.preds - rp = rng(cfg.blocks[ibbp]) - changed |= !isrequired[last(rp)] - isrequired[last(rp)] = true - end - end - end - return changed -end - using Core: CodeInfo using Core.Compiler: CFG, BasicBlock, compute_basic_blocks