diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp index 03ca3d800f6b9..57be863cfa1b8 100644 --- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp +++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp @@ -137,32 +137,41 @@ class MapInfoFinalizationPass !fir::factory::isOptionalArgument(descriptor.getDefiningOp())) return descriptor; - mlir::Value &slot = localBoxAllocas[descriptor.getDefiningOp()]; - if (slot) { - return slot; + mlir::Value &alloca = localBoxAllocas[descriptor.getDefiningOp()]; + mlir::Location loc = boxMap->getLoc(); + + if (!alloca) { + // The fir::BoxOffsetOp only works with !fir.ref> types, as + // allowing it to access non-reference box operations can cause some + // problematic SSA IR. However, in the case of assumed shape's the type + // is not a !fir.ref, in these cases to retrieve the appropriate + // !fir.ref> to access the data we need to map we must + // perform an alloca and then store to it and retrieve the data from the + // new alloca. + mlir::OpBuilder::InsertPoint insPt = builder.saveInsertionPoint(); + mlir::Block *allocaBlock = builder.getAllocaBlock(); + assert(allocaBlock && "No alloca block found for this top level op"); + builder.setInsertionPointToStart(allocaBlock); + + mlir::Type allocaType = descriptor.getType(); + if (fir::isBoxAddress(allocaType)) + allocaType = fir::unwrapRefType(allocaType); + alloca = fir::AllocaOp::create(builder, loc, allocaType); + builder.restoreInsertionPoint(insPt); } - // The fir::BoxOffsetOp only works with !fir.ref> types, as - // allowing it to access non-reference box operations can cause some - // problematic SSA IR. However, in the case of assumed shape's the type - // is not a !fir.ref, in these cases to retrieve the appropriate - // !fir.ref> to access the data we need to map we must - // perform an alloca and then store to it and retrieve the data from the new - // alloca. - mlir::OpBuilder::InsertPoint insPt = builder.saveInsertionPoint(); - mlir::Block *allocaBlock = builder.getAllocaBlock(); - mlir::Location loc = boxMap->getLoc(); - assert(allocaBlock && "No alloca block found for this top level op"); - builder.setInsertionPointToStart(allocaBlock); - - mlir::Type allocaType = descriptor.getType(); - if (fir::isBoxAddress(allocaType)) - allocaType = fir::unwrapRefType(allocaType); - auto alloca = fir::AllocaOp::create(builder, loc, allocaType); - builder.restoreInsertionPoint(insPt); // We should only emit a store if the passed in data is present, it is // possible a user passes in no argument to an optional parameter, in which // case we cannot store or we'll segfault on the emitted memcpy. + // TODO: We currently emit a present -> load/store every time we use a + // mapped value that requires a local allocation, this isn't the most + // efficient, although, it is more correct in a lot of situations. One + // such situation is emitting a this series of instructions in separate + // segments of a branch (e.g. two target regions in separate else/if branch + // mapping the same function argument), however, it would be nice to be able + // to optimize these situations e.g. raising the load/store out of the + // branch if possible. But perhaps this is best left to lower level + // optimisation passes. auto isPresent = fir::IsPresentOp::create(builder, loc, builder.getI1Type(), descriptor); builder.genIfOp(loc, {}, isPresent, false) @@ -171,7 +180,7 @@ class MapInfoFinalizationPass fir::StoreOp::create(builder, loc, descriptor, alloca); }) .end(); - return slot = alloca; + return alloca; } /// Function that generates a FIR operation accessing the descriptor's diff --git a/flang/test/Lower/OpenMP/optional-argument-map-3.f90 b/flang/test/Lower/OpenMP/optional-argument-map-3.f90 new file mode 100644 index 0000000000000..7e2a24e31123e --- /dev/null +++ b/flang/test/Lower/OpenMP/optional-argument-map-3.f90 @@ -0,0 +1,46 @@ +!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s + +module mod +contains + subroutine foo(dt, switch) + implicit none + real(4), dimension(:), intent(inout) :: dt + logical, intent(in) :: switch + integer :: dim, idx + + if (switch) then +!$omp target teams distribute parallel do + do idx = 1, 100 + dt(idx) = 20 + end do + else +!$omp target teams distribute parallel do + do idx = 1, 100 + dt(idx) = 30 + end do + end if + end subroutine foo +end module + +! CHECK-LABEL: func.func @{{.*}}( +! CHECK-SAME: %[[ARG0:.*]]: !fir.box> {fir.bindc_name = "dt"}, +! CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box> +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope {{.*}} +! CHECK: fir.if %{{.*}} { +! CHECK: %[[VAL_2:.*]] = fir.is_present %[[VAL_1]]#1 : (!fir.box>) -> i1 +! CHECK: fir.if %[[VAL_2]] { +! CHECK: fir.store %[[VAL_1]]#1 to %[[VAL_0]] : !fir.ref>> +! CHECK: } +! CHECK: %[[VAL_3:.*]] = fir.box_offset %[[VAL_0]] base_addr : (!fir.ref>>) -> !fir.llvm_ptr>> +! CHECK: %[[VAL_4:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref>>, f32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[VAL_3]] : !fir.llvm_ptr>>) bounds(%{{.*}}) -> !fir.llvm_ptr>> {name = ""} +! CHECK: %[[VAL_5:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref>>, !fir.box>) map_clauses(implicit, to) capture(ByRef) members(%[[VAL_4]] : [0] : !fir.llvm_ptr>>) -> !fir.ref> {name = "dt"} +! CHECK: omp.target host_eval({{.*}}) map_entries({{.*}}%[[VAL_5]] -> {{.*}}, %[[VAL_4]] -> {{.*}} : {{.*}}) { +! CHECK: } else { +! CHECK: %[[VAL_6:.*]] = fir.is_present %[[VAL_1]]#1 : (!fir.box>) -> i1 +! CHECK: fir.if %[[VAL_6]] { +! CHECK: fir.store %[[VAL_1]]#1 to %[[VAL_0]] : !fir.ref>> +! CHECK: } +! CHECK: %[[VAL_7:.*]] = fir.box_offset %[[VAL_0]] base_addr : (!fir.ref>>) -> !fir.llvm_ptr>> +! CHECK: %[[VAL_8:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref>>, f32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[VAL_7]] : !fir.llvm_ptr>>) bounds(%{{.*}}) -> !fir.llvm_ptr>> {name = ""} +! CHECK: %[[VAL_9:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref>>, !fir.box>) map_clauses(implicit, to) capture(ByRef) members(%[[VAL_8]] : [0] : !fir.llvm_ptr>>) -> !fir.ref> {name = "dt"} +! CHECK: omp.target host_eval({{.*}}) map_entries({{.*}}, %[[VAL_9]] ->{{.*}}, %[[VAL_8]] -> {{.*}} : {{.*}}) { diff --git a/offload/test/offloading/fortran/optional-mapped-arguments-3.f90 b/offload/test/offloading/fortran/optional-mapped-arguments-3.f90 new file mode 100644 index 0000000000000..cf82a854b5474 --- /dev/null +++ b/offload/test/offloading/fortran/optional-mapped-arguments-3.f90 @@ -0,0 +1,45 @@ +! OpenMP offloading test that checks we do not cause a segfault when mapping +! optional function arguments (present or otherwise). No results requiring +! checking other than that the program compiles and runs to completion with no +! error. This particular variation checks that we're correctly emitting the +! load/store in both branches mapping the input array. +! REQUIRES: flang, amdgpu + +! RUN: %libomptarget-compile-fortran-run-and-check-generic +module reproducer_mod +contains + subroutine branching_target_call(dt, switch) + implicit none + real(4), dimension(:), intent(inout) :: dt + logical, intent(in) :: switch + integer :: dim, idx + + dim = size(dt) + if (switch) then +!$omp target teams distribute parallel do + do idx = 1, dim + dt(idx) = 20 + end do + else +!$omp target teams distribute parallel do + do idx = 1, dim + dt(idx) = 30 + end do + end if + end subroutine branching_target_call +end module reproducer_mod + +program reproducer + use reproducer_mod + implicit none + real(4), dimension(:), allocatable :: dt + integer :: n = 21312 + integer :: i + + allocate (dt(n)) + call branching_target_call(dt, .FALSE.) + call branching_target_call(dt, .TRUE.) + print *, "PASSED" +end program reproducer + +! CHECK: PASSED