Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions flang/include/flang/Optimizer/OpenMP/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -112,4 +112,15 @@ def GenericLoopConversionPass
];
}

def AutomapToTargetDataPass
: Pass<"omp-automap-to-target-data", "::mlir::ModuleOp"> {
let summary = "Insert OpenMP target data operations for AUTOMAP variables";
let description = [{
Inserts `omp.target_enter_data` and `omp.target_exit_data` operations to
map variables marked with the `AUTOMAP` modifier when their allocation
or deallocation is detected in the FIR.
}];
let dependentDialects = ["mlir::omp::OpenMPDialect"];
}

#endif //FORTRAN_OPTIMIZER_OPENMP_PASSES
34 changes: 34 additions & 0 deletions flang/include/flang/Support/OpenMP-utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,13 @@
#ifndef FORTRAN_SUPPORT_OPENMP_UTILS_H_
#define FORTRAN_SUPPORT_OPENMP_UTILS_H_

#include "flang/Optimizer/Builder/DirectivesCommon.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/HLFIRTools.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Semantics/symbol.h"

#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/Value.h"

Expand Down Expand Up @@ -72,6 +77,35 @@ struct EntryBlockArgs {
/// \param [in] region - Empty region in which to create the entry block.
mlir::Block *genEntryBlock(
mlir::OpBuilder &builder, const EntryBlockArgs &args, mlir::Region &region);

// Returns true if the variable has a dynamic size and therefore requires
// bounds operations to describe its extents.
inline bool needsBoundsOps(mlir::Value var) {
assert(mlir::isa<mlir::omp::PointerLikeType>(var.getType()) &&
"only pointer like types expected");
mlir::Type t = fir::unwrapRefType(var.getType());
if (mlir::Type inner = fir::dyn_cast_ptrOrBoxEleTy(t))
return fir::hasDynamicSize(inner);
return fir::hasDynamicSize(t);
}

// Generate MapBoundsOp operations for the variable if required.
inline void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value var,
llvm::SmallVectorImpl<mlir::Value> &boundsOps) {
mlir::Location loc = var.getLoc();
fir::factory::AddrAndBoundsInfo info =
fir::factory::getDataOperandBaseAddr(builder, var,
/*isOptional=*/false, loc);
fir::ExtendedValue exv =
hlfir::translateToExtendedValue(loc, builder, hlfir::Entity{info.addr},
/*contiguousHint=*/true)
.first;
llvm::SmallVector<mlir::Value> tmp =
fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
mlir::omp::MapBoundsType>(
builder, info, exv, /*dataExvIsAssumedSize=*/false, loc);
llvm::append_range(boundsOps, tmp);
}
} // namespace Fortran::common::openmp

#endif // FORTRAN_SUPPORT_OPENMP_UTILS_H_
130 changes: 130 additions & 0 deletions flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
//===- AutomapToTargetData.cpp -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/HLFIRTools.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/Dialect/Support/KindMapping.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Support/OpenMP-utils.h"

#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/Operation.h"
#include "mlir/Pass/Pass.h"

#include "llvm/Frontend/OpenMP/OMPConstants.h"

namespace flangomp {
#define GEN_PASS_DEF_AUTOMAPTOTARGETDATAPASS
#include "flang/Optimizer/OpenMP/Passes.h.inc"
} // namespace flangomp

using namespace mlir;
using namespace Fortran::common::openmp;

namespace {
class AutomapToTargetDataPass
: public flangomp::impl::AutomapToTargetDataPassBase<
AutomapToTargetDataPass> {
void findRelatedAllocmemFreemem(fir::AddrOfOp addressOfOp,
llvm::DenseSet<fir::StoreOp> &allocmems,
llvm::DenseSet<fir::LoadOp> &freemems) {
assert(addressOfOp->hasOneUse() && "op must have single use");

auto declaredRef =
cast<hlfir::DeclareOp>(*addressOfOp->getUsers().begin())->getResult(0);

for (Operation *refUser : declaredRef.getUsers()) {
if (auto storeOp = dyn_cast<fir::StoreOp>(refUser))
if (auto emboxOp = storeOp.getValue().getDefiningOp<fir::EmboxOp>())
if (auto allocmemOp =
emboxOp.getOperand(0).getDefiningOp<fir::AllocMemOp>())
allocmems.insert(storeOp);

if (auto loadOp = dyn_cast<fir::LoadOp>(refUser))
for (Operation *loadUser : loadOp.getResult().getUsers())
if (auto boxAddrOp = dyn_cast<fir::BoxAddrOp>(loadUser))
for (Operation *boxAddrUser : boxAddrOp.getResult().getUsers())
if (auto freememOp = dyn_cast<fir::FreeMemOp>(boxAddrUser))
freemems.insert(loadOp);
}
}

void runOnOperation() override {
ModuleOp module = getOperation()->getParentOfType<ModuleOp>();
if (!module)
module = dyn_cast<ModuleOp>(getOperation());
if (!module)
return;

// Build FIR builder for helper utilities.
fir::KindMapping kindMap = fir::getKindMapping(module);
fir::FirOpBuilder builder{module, std::move(kindMap)};

// Collect global variables with AUTOMAP flag.
llvm::DenseSet<fir::GlobalOp> automapGlobals;
module.walk([&](fir::GlobalOp globalOp) {
if (auto iface =
dyn_cast<omp::DeclareTargetInterface>(globalOp.getOperation()))
if (iface.isDeclareTarget() && iface.getDeclareTargetAutomap() &&
iface.getDeclareTargetDeviceType() !=
omp::DeclareTargetDeviceType::host)
automapGlobals.insert(globalOp);
});

auto addMapInfo = [&](auto globalOp, auto memOp) {
builder.setInsertionPointAfter(memOp);
SmallVector<Value> bounds;
if (needsBoundsOps(memOp.getMemref()))
genBoundsOps(builder, memOp.getMemref(), bounds);

omp::TargetEnterExitUpdateDataOperands clauses;
mlir::omp::MapInfoOp mapInfo = mlir::omp::MapInfoOp::create(
builder, memOp.getLoc(), memOp.getMemref().getType(),
memOp.getMemref(),
TypeAttr::get(fir::unwrapRefType(memOp.getMemref().getType())),
builder.getIntegerAttr(
builder.getIntegerType(64, false),
static_cast<unsigned>(
isa<fir::StoreOp>(memOp)
? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO
: llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE)),
builder.getAttr<omp::VariableCaptureKindAttr>(
omp::VariableCaptureKind::ByCopy),
/*var_ptr_ptr=*/mlir::Value{},
/*members=*/SmallVector<Value>{},
/*members_index=*/ArrayAttr{}, bounds,
/*mapperId=*/mlir::FlatSymbolRefAttr(), globalOp.getSymNameAttr(),
builder.getBoolAttr(false));
clauses.mapVars.push_back(mapInfo);
isa<fir::StoreOp>(memOp)
? builder.create<omp::TargetEnterDataOp>(memOp.getLoc(), clauses)
: builder.create<omp::TargetExitDataOp>(memOp.getLoc(), clauses);
};

for (fir::GlobalOp globalOp : automapGlobals) {
if (auto uses = globalOp.getSymbolUses(module.getOperation())) {
llvm::DenseSet<fir::StoreOp> allocmemStores;
llvm::DenseSet<fir::LoadOp> freememLoads;
for (auto &x : *uses)
if (auto addrOp = dyn_cast<fir::AddrOfOp>(x.getUser()))
findRelatedAllocmemFreemem(addrOp, allocmemStores, freememLoads);

for (auto storeOp : allocmemStores)
addMapInfo(globalOp, storeOp);

for (auto loadOp : freememLoads)
addMapInfo(globalOp, loadOp);
}
}
}
};
} // namespace
1 change: 1 addition & 0 deletions flang/lib/Optimizer/OpenMP/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)

add_flang_library(FlangOpenMPTransforms
AutomapToTargetData.cpp
DoConcurrentConversion.cpp
FunctionFiltering.cpp
GenericLoopConversion.cpp
Expand Down
35 changes: 2 additions & 33 deletions flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "flang/Optimizer/Dialect/Support/KindMapping.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/OpenMP/Passes.h"
#include "flang/Support/OpenMP-utils.h"

#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
Expand All @@ -47,6 +48,7 @@ namespace flangomp {
} // namespace flangomp

using namespace mlir;
using namespace Fortran::common::openmp;

namespace {
class MapsForPrivatizedSymbolsPass
Expand Down Expand Up @@ -193,38 +195,5 @@ class MapsForPrivatizedSymbolsPass
}
}
}
// As the name suggests, this function examines var to determine if
// it has dynamic size. If true, this pass'll have to extract these
// bounds from descriptor of var and add the bounds to the resultant
// MapInfoOp.
bool needsBoundsOps(mlir::Value var) {
assert(mlir::isa<omp::PointerLikeType>(var.getType()) &&
"needsBoundsOps can deal only with pointer types");
mlir::Type t = fir::unwrapRefType(var.getType());
// t could be a box, so look inside the box
auto innerType = fir::dyn_cast_ptrOrBoxEleTy(t);
if (innerType)
return fir::hasDynamicSize(innerType);
return fir::hasDynamicSize(t);
}

void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value var,
llvm::SmallVector<mlir::Value> &boundsOps) {
mlir::Location loc = var.getLoc();
fir::factory::AddrAndBoundsInfo info =
fir::factory::getDataOperandBaseAddr(builder, var,
/*isOptional=*/false, loc);
fir::ExtendedValue extendedValue =
hlfir::translateToExtendedValue(loc, builder, hlfir::Entity{info.addr},
/*continguousHint=*/true)
.first;
llvm::SmallVector<mlir::Value> boundsOpsVec =
fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
mlir::omp::MapBoundsType>(
builder, info, extendedValue,
/*dataExvIsAssumedSize=*/false, loc);
for (auto bounds : boundsOpsVec)
boundsOps.push_back(bounds);
}
};
} // namespace
12 changes: 6 additions & 6 deletions flang/lib/Optimizer/Passes/Pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,13 +316,13 @@ void createOpenMPFIRPassPipeline(mlir::PassManager &pm,
pm.addPass(flangomp::createDoConcurrentConversionPass(
opts.doConcurrentMappingKind == DoConcurrentMappingKind::DCMK_Device));

// The MapsForPrivatizedSymbols pass needs to run before
// MapInfoFinalizationPass because the former creates new
// MapInfoOp instances, typically for descriptors.
// MapInfoFinalizationPass adds MapInfoOp instances for the descriptors
// underlying data which is necessary to access the data on the offload
// target device.
// The MapsForPrivatizedSymbols and AutomapToTargetDataPass pass need to run
// before MapInfoFinalizationPass because they create new MapInfoOp
// instances, typically for descriptors. MapInfoFinalizationPass adds
// MapInfoOp instances for the descriptors underlying data which is necessary
// to access the data on the offload target device.
pm.addPass(flangomp::createMapsForPrivatizedSymbolsPass());
pm.addPass(flangomp::createAutomapToTargetDataPass());
pm.addPass(flangomp::createMapInfoFinalizationPass());
pm.addPass(flangomp::createMarkDeclareTargetPass());
pm.addPass(flangomp::createGenericLoopConversionPass());
Expand Down
58 changes: 58 additions & 0 deletions flang/test/Transforms/omp-automap-to-target-data.fir
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// RUN: fir-opt --omp-automap-to-target-data %s | FileCheck %s
// Test OMP AutomapToTargetData pass.

module {
fir.global
@_QMtestEarr{omp.declare_target = #omp.declaretarget<device_type = (any),
capture_clause = (enter), automap = true>} target
: !fir.box<!fir.heap<!fir.array<?xi32>>>

func.func @automap() {
%c0 = arith.constant 0 : index
%c10 = arith.constant 10 : i32
%addr = fir.address_of(@_QMtestEarr) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
%decl:2 = hlfir.declare %addr {fortran_attrs = #fir.var_attrs<allocatable, target>, uniq_name = "_QMtestEarr"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
%idx = fir.convert %c10 : (i32) -> index
%cond = arith.cmpi sgt, %idx, %c0 : index
%n = arith.select %cond, %idx, %c0 : index
%mem = fir.allocmem !fir.array<?xi32>, %n {fir.must_be_heap = true}
%shape = fir.shape %n : (index) -> !fir.shape<1>
%box = fir.embox %mem(%shape) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
fir.store %box to %decl#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
%ld = fir.load %decl#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
%base = fir.box_addr %ld : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
fir.freemem %base : !fir.heap<!fir.array<?xi32>>
%undef = fir.zero_bits !fir.heap<!fir.array<?xi32>>
%sh0 = fir.shape %c0 : (index) -> !fir.shape<1>
%empty = fir.embox %undef(%sh0) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
fir.store %empty to %decl#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
return
}
}

// CHECK: fir.global @[[AUTOMAP:.*]] {{{.*}} automap = true
// CHECK-LABEL: func.func @automap()
// CHECK: %[[AUTOMAP_ADDR:.*]] = fir.address_of(@[[AUTOMAP]])
// CHECK: %[[AUTOMAP_DECL:.*]]:2 = hlfir.declare %[[AUTOMAP_ADDR]]
// CHECK: %[[ALLOC_MEM:.*]] = fir.allocmem
// CHECK-NEXT: fir.shape
// CHECK-NEXT: %[[ARR_BOXED:.*]] = fir.embox %[[ALLOC_MEM]]
// CHECK-NEXT: fir.store %[[ARR_BOXED]]
// CHECK-NEXT: %[[ARR_BOXED_LOADED:.*]] = fir.load %[[AUTOMAP_DECL]]#0
// CHECK-NEXT: %[[ARR_HEAP_PTR:.*]] = fir.box_addr %[[ARR_BOXED_LOADED]]
// CHECK-NEXT: %[[DIM0:.*]] = arith.constant 0 : index
// CHECK-NEXT: %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[ARR_BOXED_LOADED]], %[[DIM0]]
// CHECK-NEXT: %[[ONE:.*]] = arith.constant 1 : index
// CHECK-NEXT: %[[ZERO:.*]] = arith.constant 0 : index
// CHECK-NEXT: %[[BOX_DIMS2:.*]]:3 = fir.box_dims %[[ARR_BOXED_LOADED]], %[[ZERO]]
// CHECK-NEXT: %[[LOWER_BOUND:.*]] = arith.constant 0 : index
// CHECK-NEXT: %[[UPPER_BOUND:.*]] = arith.subi %[[BOX_DIMS2]]#1, %[[ONE]] : index
// CHECK-NEXT: omp.map.bounds lower_bound(%[[LOWER_BOUND]] : index) upper_bound(%[[UPPER_BOUND]] : index) extent(%[[BOX_DIMS2]]#1 : index) stride(%[[BOX_DIMS2]]#2 : index) start_idx(%[[BOX_DIMS]]#0 : index) {stride_in_bytes = true}
// CHECK-NEXT: arith.muli %[[BOX_DIMS2]]#2, %[[BOX_DIMS2]]#1 : index
// CHECK-NEXT: %[[MAP_INFO:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 {{.*}} map_clauses(to) capture(ByCopy)
// CHECK-NEXT: omp.target_enter_data map_entries(%[[MAP_INFO]]
// CHECK: %[[LOAD:.*]] = fir.load %[[AUTOMAP_DECL]]#0
// CHECK: %[[EXIT_MAP:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 {{.*}} map_clauses(delete) capture(ByCopy)
// CHECK-NEXT: omp.target_exit_data map_entries(%[[EXIT_MAP]]
// CHECK-NEXT: %[[BOXADDR:.*]] = fir.box_addr %[[LOAD]]
// CHECK-NEXT: fir.freemem %[[BOXADDR]]
36 changes: 36 additions & 0 deletions offload/test/offloading/fortran/declare-target-automap.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
!Offloading test for AUTOMAP modifier in declare target enter
! REQUIRES: flang, amdgpu

program automap_program
use iso_c_binding, only: c_loc
use omp_lib, only: omp_get_default_device, omp_target_is_present
integer, parameter :: N = 10
integer :: i
integer, allocatable, target :: automap_array(:)
!$omp declare target enter(automap:automap_array)

! false since the storage is not present even though the descriptor is present
write (*, *) omp_target_is_present(c_loc(automap_array), omp_get_default_device())
! CHECK: 0

allocate (automap_array(N))
! true since the storage should be allocated and reference count incremented by the allocate
write (*, *) omp_target_is_present(c_loc(automap_array), omp_get_default_device())
! CHECK: 1

! since storage is present this should not be a runtime error
!$omp target teams loop
do i = 1, N
automap_array(i) = i
end do

!$omp target update from(automap_array)
write (*, *) automap_array
! CHECK: 1 2 3 4 5 6 7 8 9 10

deallocate (automap_array)

! automap_array should have it's storage unmapped on device here
write (*, *) omp_target_is_present(c_loc(automap_array), omp_get_default_device())
! CHECK: 0
end program