Skip to content

Commit 54e46ba

Browse files
[flang][openacc] Fix post_alloc declare function ordering (#69980)
The declare actions were introduced to capture semantics dealing with allocation of descriptor-based variable. However, the post_alloc action has an ordering error. It needs to update descriptor first before the mapping action of the data. The reason for this is that implicit attach must occur during mapping action - but updating the descriptor synchronizes it with the host copy (which would hold a host pointer).
1 parent 49893fb commit 54e46ba

File tree

2 files changed

+41
-31
lines changed

2 files changed

+41
-31
lines changed

flang/lib/Lower/OpenACC.cpp

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ static unsigned routineCounter = 0;
3737
static constexpr llvm::StringRef accRoutinePrefix = "acc_routine_";
3838
static constexpr llvm::StringRef accPrivateInitName = "acc.private.init";
3939
static constexpr llvm::StringRef accReductionInitName = "acc.reduction.init";
40+
static constexpr llvm::StringRef accFirDescriptorPostfix = "_desc";
4041

4142
static mlir::Location
4243
genOperandLocation(Fortran::lower::AbstractConverter &converter,
@@ -138,27 +139,31 @@ static void createDeclareAllocFuncWithArg(mlir::OpBuilder &modBuilder,
138139
auto registerFuncOp = createDeclareFunc(
139140
modBuilder, builder, loc, registerFuncName.str(), {descTy}, {loc});
140141

142+
llvm::SmallVector<mlir::Value> bounds;
143+
std::stringstream asFortranDesc;
144+
asFortranDesc << asFortran.str() << accFirDescriptorPostfix.str();
145+
146+
// Updating descriptor must occur before the mapping of the data so that
147+
// attached data pointer is not overwritten.
148+
mlir::acc::UpdateDeviceOp updateDeviceOp =
149+
createDataEntryOp<mlir::acc::UpdateDeviceOp>(
150+
builder, loc, registerFuncOp.getArgument(0), asFortranDesc, bounds,
151+
/*structured=*/false, /*implicit=*/true,
152+
mlir::acc::DataClause::acc_update_device, descTy);
153+
llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 0, 0, 1};
154+
llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
155+
createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands, operandSegments);
156+
141157
mlir::Value desc =
142158
builder.create<fir::LoadOp>(loc, registerFuncOp.getArgument(0));
143159
fir::BoxAddrOp boxAddrOp = builder.create<fir::BoxAddrOp>(loc, desc);
144160
addDeclareAttr(builder, boxAddrOp.getOperation(), clause);
145-
146-
llvm::SmallVector<mlir::Value> bounds;
147161
EntryOp entryOp = createDataEntryOp<EntryOp>(
148162
builder, loc, boxAddrOp.getResult(), asFortran, bounds,
149163
/*structured=*/false, /*implicit=*/false, clause, boxAddrOp.getType());
150164
builder.create<mlir::acc::DeclareEnterOp>(
151165
loc, mlir::ValueRange(entryOp.getAccPtr()));
152166

153-
asFortran << "_desc";
154-
mlir::acc::UpdateDeviceOp updateDeviceOp =
155-
createDataEntryOp<mlir::acc::UpdateDeviceOp>(
156-
builder, loc, registerFuncOp.getArgument(0), asFortran, bounds,
157-
/*structured=*/false, /*implicit=*/true,
158-
mlir::acc::DataClause::acc_update_device, descTy);
159-
llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 0, 0, 1};
160-
llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
161-
createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands, operandSegments);
162167
modBuilder.setInsertionPointAfter(registerFuncOp);
163168
builder.restoreInsertionPoint(crtInsPt);
164169
}
@@ -208,7 +213,7 @@ static void createDeclareDeallocFuncWithArg(
208213
auto postDeallocOp = createDeclareFunc(
209214
modBuilder, builder, loc, postDeallocFuncName.str(), {descTy}, {loc});
210215
loadOp = builder.create<fir::LoadOp>(loc, postDeallocOp.getArgument(0));
211-
asFortran << "_desc";
216+
asFortran << accFirDescriptorPostfix.str();
212217
mlir::acc::UpdateDeviceOp updateDeviceOp =
213218
createDataEntryOp<mlir::acc::UpdateDeviceOp>(
214219
builder, loc, loadOp, asFortran, bounds,
@@ -2753,28 +2758,33 @@ static void createDeclareAllocFunc(mlir::OpBuilder &modBuilder,
27532758

27542759
fir::AddrOfOp addrOp = builder.create<fir::AddrOfOp>(
27552760
loc, fir::ReferenceType::get(globalOp.getType()), globalOp.getSymbol());
2756-
auto loadOp = builder.create<fir::LoadOp>(loc, addrOp.getResult());
2757-
fir::BoxAddrOp boxAddrOp = builder.create<fir::BoxAddrOp>(loc, loadOp);
2758-
addDeclareAttr(builder, boxAddrOp.getOperation(), clause);
27592761

27602762
std::stringstream asFortran;
27612763
asFortran << Fortran::lower::mangle::demangleName(globalOp.getSymName());
2764+
std::stringstream asFortranDesc;
2765+
asFortranDesc << asFortran.str() << accFirDescriptorPostfix.str();
27622766
llvm::SmallVector<mlir::Value> bounds;
2763-
EntryOp entryOp = createDataEntryOp<EntryOp>(
2764-
builder, loc, boxAddrOp.getResult(), asFortran, bounds,
2765-
/*structured=*/false, /*implicit=*/false, clause, boxAddrOp.getType());
2766-
builder.create<mlir::acc::DeclareEnterOp>(
2767-
loc, mlir::ValueRange(entryOp.getAccPtr()));
27682767

2769-
asFortran << "_desc";
2768+
// Updating descriptor must occur before the mapping of the data so that
2769+
// attached data pointer is not overwritten.
27702770
mlir::acc::UpdateDeviceOp updateDeviceOp =
27712771
createDataEntryOp<mlir::acc::UpdateDeviceOp>(
2772-
builder, loc, addrOp, asFortran, bounds,
2772+
builder, loc, addrOp, asFortranDesc, bounds,
27732773
/*structured=*/false, /*implicit=*/true,
27742774
mlir::acc::DataClause::acc_update_device, addrOp.getType());
27752775
llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 0, 0, 1};
27762776
llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
27772777
createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands, operandSegments);
2778+
2779+
auto loadOp = builder.create<fir::LoadOp>(loc, addrOp.getResult());
2780+
fir::BoxAddrOp boxAddrOp = builder.create<fir::BoxAddrOp>(loc, loadOp);
2781+
addDeclareAttr(builder, boxAddrOp.getOperation(), clause);
2782+
EntryOp entryOp = createDataEntryOp<EntryOp>(
2783+
builder, loc, boxAddrOp.getResult(), asFortran, bounds,
2784+
/*structured=*/false, /*implicit=*/false, clause, boxAddrOp.getType());
2785+
builder.create<mlir::acc::DeclareEnterOp>(
2786+
loc, mlir::ValueRange(entryOp.getAccPtr()));
2787+
27782788
modBuilder.setInsertionPointAfter(registerFuncOp);
27792789
}
27802790

@@ -2832,7 +2842,7 @@ static void createDeclareDeallocFunc(mlir::OpBuilder &modBuilder,
28322842

28332843
addrOp = builder.create<fir::AddrOfOp>(
28342844
loc, fir::ReferenceType::get(globalOp.getType()), globalOp.getSymbol());
2835-
asFortran << "_desc";
2845+
asFortran << accFirDescriptorPostfix.str();
28362846
mlir::acc::UpdateDeviceOp updateDeviceOp =
28372847
createDataEntryOp<mlir::acc::UpdateDeviceOp>(
28382848
builder, loc, addrOp, asFortran, bounds,

flang/test/Lower/OpenACC/acc-declare.f90

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -228,30 +228,30 @@ subroutine acc_declare_allocate()
228228

229229
! CHECK-LABEL: func.func private @_QMacc_declareFacc_declare_allocateEa_acc_declare_update_desc_post_alloc(
230230
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
231+
! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "a_desc", structured = false}
232+
! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
231233
! CHECK: %[[LOAD:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
232234
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] {acc.declare = #acc.declare<dataClause = acc_create>} : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
233235
! CHECK: %[[CREATE:.*]] = acc.create varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> {name = "a", structured = false}
234236
! CHECK: acc.declare_enter dataOperands(%[[CREATE]] : !fir.heap<!fir.array<?xi32>>)
235-
! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "a_desc", structured = false}
236-
! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
237237
! CHECK: return
238238
! CHECK: }
239239

240240
! CHECK-LABEL: func.func private @_QMacc_declareFacc_declare_allocateEa_acc_declare_update_desc_pre_dealloc(
241241
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
242242
! CHECK: %[[LOAD:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
243243
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] {acc.declare = #acc.declare<dataClause = acc_create>} : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
244-
! CHECK: %[[GETDEVICEPTR:.*]] = acc.getdeviceptr varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> {dataClause = #acc<data_clause acc_create>, name = "a_desc", structured = false}
244+
! CHECK: %[[GETDEVICEPTR:.*]] = acc.getdeviceptr varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> {dataClause = #acc<data_clause acc_create>, name = "a", structured = false}
245245
! CHECK: acc.declare_exit dataOperands(%[[GETDEVICEPTR]] : !fir.heap<!fir.array<?xi32>>)
246-
! CHECK: acc.delete accPtr(%[[GETDEVICEPTR]] : !fir.heap<!fir.array<?xi32>>) {dataClause = #acc<data_clause acc_create>, name = "a_desc", structured = false}
246+
! CHECK: acc.delete accPtr(%[[GETDEVICEPTR]] : !fir.heap<!fir.array<?xi32>>) {dataClause = #acc<data_clause acc_create>, name = "a", structured = false}
247247
! CHECK: return
248248
! CHECK: }
249249

250250
! CHECK-LABEL: func.func private @_QMacc_declareFacc_declare_allocateEa_acc_declare_update_desc_post_dealloc(
251251
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
252252
! CHECK: %[[LOAD:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
253253
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
254-
! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> {implicit = true, name = "a_desc_desc", structured = false}
254+
! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[BOX_ADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> {implicit = true, name = "a_desc", structured = false}
255255
! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.heap<!fir.array<?xi32>>)
256256
! CHECK: return
257257
! CHECK: }
@@ -312,18 +312,18 @@ module acc_declare_allocatable_test
312312

313313
! CHECK-LABEL: func.func private @_QMacc_declare_allocatable_testEdata1_acc_declare_update_desc_post_alloc() {
314314
! CHECK: %[[GLOBAL_ADDR:.*]] = fir.address_of(@_QMacc_declare_allocatable_testEdata1) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
315+
! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "data1_desc", structured = false}
316+
! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
315317
! CHECK: %[[LOAD:.*]] = fir.load %[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
316318
! CHECK: %[[BOXADDR:.*]] = fir.box_addr %[[LOAD]] {acc.declare = #acc.declare<dataClause = acc_create>} : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
317319
! CHECK: %[[CREATE:.*]] = acc.create varPtr(%[[BOXADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> {name = "data1", structured = false}
318320
! CHECK: acc.declare_enter dataOperands(%[[CREATE]] : !fir.heap<!fir.array<?xi32>>)
319-
! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "data1_desc", structured = false}
320-
! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
321321
! CHECK: return
322322
! CHECK: }
323323

324324
! CHECK-LABEL: func.func private @_QMacc_declare_allocatable_testEdata1_acc_declare_update_desc_pre_dealloc() {
325325
! CHECK: %[[GLOBAL_ADDR:.*]] = fir.address_of(@_QMacc_declare_allocatable_testEdata1) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
326-
! CHECK: %[[LOAD]] = fir.load %[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
326+
! CHECK: %[[LOAD:.*]] = fir.load %[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
327327
! CHECK: %[[BOXADDR:.*]] = fir.box_addr %[[LOAD]] {acc.declare = #acc.declare<dataClause = acc_create>} : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
328328
! CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[BOXADDR]] : !fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> {dataClause = #acc<data_clause acc_create>, name = "data1", structured = false}
329329
! CHECK: acc.declare_exit dataOperands(%[[DEVICEPTR]] : !fir.heap<!fir.array<?xi32>>)

0 commit comments

Comments
 (0)