Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 31 additions & 22 deletions flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,32 +137,41 @@ class MapInfoFinalizationPass
!fir::factory::isOptionalArgument(descriptor.getDefiningOp()))
return descriptor;

mlir::Value &slot = localBoxAllocas[descriptor.getDefiningOp()];
if (slot) {
return slot;
mlir::Value &alloca = localBoxAllocas[descriptor.getDefiningOp()];
mlir::Location loc = boxMap->getLoc();

if (!alloca) {
// The fir::BoxOffsetOp only works with !fir.ref<!fir.box<...>> types, as
// allowing it to access non-reference box operations can cause some
// problematic SSA IR. However, in the case of assumed shape's the type
// is not a !fir.ref, in these cases to retrieve the appropriate
// !fir.ref<!fir.box<...>> to access the data we need to map we must
// perform an alloca and then store to it and retrieve the data from the
// new alloca.
mlir::OpBuilder::InsertPoint insPt = builder.saveInsertionPoint();
mlir::Block *allocaBlock = builder.getAllocaBlock();
assert(allocaBlock && "No alloca block found for this top level op");
builder.setInsertionPointToStart(allocaBlock);

mlir::Type allocaType = descriptor.getType();
if (fir::isBoxAddress(allocaType))
allocaType = fir::unwrapRefType(allocaType);
alloca = fir::AllocaOp::create(builder, loc, allocaType);
builder.restoreInsertionPoint(insPt);
}

// The fir::BoxOffsetOp only works with !fir.ref<!fir.box<...>> types, as
// allowing it to access non-reference box operations can cause some
// problematic SSA IR. However, in the case of assumed shape's the type
// is not a !fir.ref, in these cases to retrieve the appropriate
// !fir.ref<!fir.box<...>> to access the data we need to map we must
// perform an alloca and then store to it and retrieve the data from the new
// alloca.
mlir::OpBuilder::InsertPoint insPt = builder.saveInsertionPoint();
mlir::Block *allocaBlock = builder.getAllocaBlock();
mlir::Location loc = boxMap->getLoc();
assert(allocaBlock && "No alloca block found for this top level op");
builder.setInsertionPointToStart(allocaBlock);

mlir::Type allocaType = descriptor.getType();
if (fir::isBoxAddress(allocaType))
allocaType = fir::unwrapRefType(allocaType);
auto alloca = fir::AllocaOp::create(builder, loc, allocaType);
builder.restoreInsertionPoint(insPt);
// We should only emit a store if the passed in data is present, it is
// possible a user passes in no argument to an optional parameter, in which
// case we cannot store or we'll segfault on the emitted memcpy.
// TODO: We currently emit a present -> load/store every time we use a
// mapped value that requires a local allocation, this isn't the most
// efficient, although, it is more correct in a lot of situations. One
// such situation is emitting a this series of instructions in separate
// segments of a branch (e.g. two target regions in separate else/if branch
// mapping the same function argument), however, it would be nice to be able
// to optimize these situations e.g. raising the load/store out of the
// branch if possible. But perhaps this is best left to lower level
// optimisation passes.
auto isPresent =
fir::IsPresentOp::create(builder, loc, builder.getI1Type(), descriptor);
builder.genIfOp(loc, {}, isPresent, false)
Expand All @@ -171,7 +180,7 @@ class MapInfoFinalizationPass
fir::StoreOp::create(builder, loc, descriptor, alloca);
})
.end();
return slot = alloca;
return alloca;
}

/// Function that generates a FIR operation accessing the descriptor's
Expand Down
46 changes: 46 additions & 0 deletions flang/test/Lower/OpenMP/optional-argument-map-3.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s

module mod
contains
subroutine foo(dt, switch)
implicit none
real(4), dimension(:), intent(inout) :: dt
logical, intent(in) :: switch
integer :: dim, idx

if (switch) then
!$omp target teams distribute parallel do
do idx = 1, 100
dt(idx) = 20
end do
else
!$omp target teams distribute parallel do
do idx = 1, 100
dt(idx) = 30
end do
end if
end subroutine foo
end module
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use some more consistent formatting for the test code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do, I now have the power of fprettify at my finger tips... :-)


! CHECK-LABEL: func.func @{{.*}}(
! CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "dt"},
! CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box<!fir.array<?xf32>>
! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope {{.*}}
! CHECK: fir.if %{{.*}} {
! CHECK: %[[VAL_2:.*]] = fir.is_present %[[VAL_1]]#1 : (!fir.box<!fir.array<?xf32>>) -> i1
! CHECK: fir.if %[[VAL_2]] {
! CHECK: fir.store %[[VAL_1]]#1 to %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
! CHECK: }
! CHECK: %[[VAL_3:.*]] = fir.box_offset %[[VAL_0]] base_addr : (!fir.ref<!fir.box<!fir.array<?xf32>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>
! CHECK: %[[VAL_4:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf32>>>, f32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[VAL_3]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>> {name = ""}
! CHECK: %[[VAL_5:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf32>>>, !fir.box<!fir.array<?xf32>>) map_clauses(implicit, to) capture(ByRef) members(%[[VAL_4]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>) -> !fir.ref<!fir.array<?xf32>> {name = "dt"}
! CHECK: omp.target host_eval({{.*}}) map_entries({{.*}}%[[VAL_5]] -> {{.*}}, %[[VAL_4]] -> {{.*}} : {{.*}}) {
! CHECK: } else {
! CHECK: %[[VAL_6:.*]] = fir.is_present %[[VAL_1]]#1 : (!fir.box<!fir.array<?xf32>>) -> i1
! CHECK: fir.if %[[VAL_6]] {
! CHECK: fir.store %[[VAL_1]]#1 to %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
! CHECK: }
! CHECK: %[[VAL_7:.*]] = fir.box_offset %[[VAL_0]] base_addr : (!fir.ref<!fir.box<!fir.array<?xf32>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>
! CHECK: %[[VAL_8:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf32>>>, f32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[VAL_7]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>> {name = ""}
! CHECK: %[[VAL_9:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf32>>>, !fir.box<!fir.array<?xf32>>) map_clauses(implicit, to) capture(ByRef) members(%[[VAL_8]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>) -> !fir.ref<!fir.array<?xf32>> {name = "dt"}
! CHECK: omp.target host_eval({{.*}}) map_entries({{.*}}, %[[VAL_9]] ->{{.*}}, %[[VAL_8]] -> {{.*}} : {{.*}}) {
45 changes: 45 additions & 0 deletions offload/test/offloading/fortran/optional-mapped-arguments-3.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
! OpenMP offloading test that checks we do not cause a segfault when mapping
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this test rather go into the llvm test repository? That appears to me to be a runtime functionality test.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Everything in this directory is an end to end runtime tests (executed via check-offload if you have the right pre-requisites), they're just OpenMP Fortran specific :-) There's CUDA and C++ OpenMP in here as well a directory or so above (there also used to be host specific runtime tests, but since this is the offload project now they're likely still in the OpenMP subdirectory). It could also be added to the llvm test repository, but there's already plenty of precedent for adding these tests in this (and parent/sibling) directories! I'm also not so sure the llvm test repository is setup/configured to run offload tests!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, OK. I was under the impression that runtime tests should not be part of the llvm-project tests. I'll take it back in that case. :-)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a really unusual case I think and perhaps an unfortunate one as I think a few of the wider range of OpenMP/offload tests have a tendency to hang (depending on device/environment/feeling of the GPU/CPU on that particular day) makes it hard to get the tests ran in the buildbots that ping people as they need to consistently be green enough :-)

! optional function arguments (present or otherwise). No results requiring
! checking other than that the program compiles and runs to completion with no
! error. This particular variation checks that we're correctly emitting the
! load/store in both branches mapping the input array.
! REQUIRES: flang, amdgpu

! RUN: %libomptarget-compile-fortran-run-and-check-generic
module reproducer_mod
contains
subroutine branching_target_call(dt, switch)
implicit none
real(4), dimension(:), intent(inout) :: dt
logical, intent(in) :: switch
integer :: dim, idx

dim = size(dt)
if (switch) then
!$omp target teams distribute parallel do
do idx = 1, dim
dt(idx) = 20
end do
else
!$omp target teams distribute parallel do
do idx = 1, dim
dt(idx) = 30
end do
end if
end subroutine branching_target_call
end module reproducer_mod

program reproducer
use reproducer_mod
implicit none
real(4), dimension(:), allocatable :: dt
integer :: n = 21312
integer :: i

allocate (dt(n))
call branching_target_call(dt, .FALSE.)
call branching_target_call(dt, .TRUE.)
print *, "PASSED"
end program reproducer

! CHECK: PASSED