Skip to content

Commit c6c300f

Browse files
committed
[MLIR][NVVM] Update mbarrier Ops to use AnyTypeOf[] (2/n)
This is a follow up of PR #165558. (1/n) This patch updates the below mbarrier Ops to use AnyTypeOf[] construct: * mbarrier.arrive * mbarrier.arrive.noComplete * mbarrier.test.wait * cp.async.mbarrier.arrive * Updated existing tests accordingly. * Verified locally that there are no new regressions in the `integration` tests. * TODO: A few more Ops are remaining and will be migrated in a subsequent PR. Signed-off-by: Durgadoss R <[email protected]>
1 parent 225f4c6 commit c6c300f

File tree

9 files changed

+152
-137
lines changed

9 files changed

+152
-137
lines changed

flang/lib/Optimizer/Builder/IntrinsicCall.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3333,8 +3333,7 @@ IntrinsicLibrary::genBarrierArrive(mlir::Type resultType,
33333333
assert(args.size() == 1);
33343334
mlir::Value barrier = convertPtrToNVVMSpace(
33353335
builder, loc, args[0], mlir::NVVM::NVVMMemorySpace::Shared);
3336-
return mlir::NVVM::MBarrierArriveSharedOp::create(builder, loc, resultType,
3337-
barrier)
3336+
return mlir::NVVM::MBarrierArriveOp::create(builder, loc, resultType, barrier)
33383337
.getResult();
33393338
}
33403339

flang/test/Lower/CUDA/cuda-device-proc.cuf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,7 @@ end subroutine
436436

437437
! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr
438438
! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3>
439-
! CHECK: %{{.*}} = nvvm.mbarrier.arrive.shared %[[SHARED_PTR]] : !llvm.ptr<3> -> i64
439+
! CHECK: %{{.*}} = nvvm.mbarrier.arrive %[[SHARED_PTR]] : !llvm.ptr<3> -> i64
440440

441441
! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr
442442
! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3>

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td

Lines changed: 56 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -656,8 +656,8 @@ def NVVM_MBarrierInvalOp : NVVM_Op<"mbarrier.inval">,
656656
}
657657

658658
def NVVM_MBarrierArriveOp : NVVM_Op<"mbarrier.arrive">,
659-
Results<(outs LLVM_Type:$res)>,
660-
Arguments<(ins LLVM_AnyPointer:$addr)> {
659+
Results<(outs I64:$res)>,
660+
Arguments<(ins AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr)> {
661661
let summary = "MBarrier Arrive Operation";
662662
let description = [{
663663
The `nvvm.mbarrier.arrive` operation performs an arrive-on operation on the
@@ -674,36 +674,32 @@ def NVVM_MBarrierArriveOp : NVVM_Op<"mbarrier.arrive">,
674674
value are implementation-specific.
675675

676676
The operation takes the following operand:
677-
- `addr`: A pointer to the memory location of the *mbarrier object*. Uses generic
678-
addressing, but the address must still be in the shared memory space.
677+
- `addr`: A pointer to the memory location of the *mbarrier object*. The `addr`
678+
must be a pointer to generic or shared::cta memory. When it is generic, the
679+
underlying address must be within the shared::cta memory space; otherwise
680+
the behavior is undefined.
679681

680682
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
681683
}];
682-
string llvmBuilder = [{
683-
$res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive, {$addr});
684-
}];
685684
let assemblyFormat = "$addr attr-dict `:` type($addr) `->` type($res)";
686-
}
687685

688-
def NVVM_MBarrierArriveSharedOp : NVVM_Op<"mbarrier.arrive.shared">,
689-
Results<(outs LLVM_Type:$res)>,
690-
Arguments<(ins LLVM_PointerShared:$addr)> {
691-
let summary = "Shared MBarrier Arrive Operation";
692-
let description = [{
693-
This Op is the same as `nvvm.mbarrier.arrive` except that the *mbarrier object*
694-
should be accessed using a shared-memory pointer instead of a generic-memory pointer.
695-
696-
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
686+
let extraClassDeclaration = [{
687+
static mlir::NVVM::IDArgPair
688+
getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
689+
llvm::IRBuilderBase& builder);
697690
}];
691+
698692
string llvmBuilder = [{
699-
$res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive_shared, {$addr});
693+
auto [id, args] = NVVM::MBarrierArriveOp::getIntrinsicIDAndArgs(
694+
*op, moduleTranslation, builder);
695+
$res = createIntrinsicCall(builder, id, args);
700696
}];
701-
let assemblyFormat = "$addr attr-dict `:` qualified(type($addr)) `->` type($res)";
702697
}
703698

704699
def NVVM_MBarrierArriveNocompleteOp : NVVM_Op<"mbarrier.arrive.nocomplete">,
705-
Results<(outs LLVM_Type:$res)>,
706-
Arguments<(ins LLVM_AnyPointer:$addr, I32:$count)> {
700+
Results<(outs I64:$res)>,
701+
Arguments<(ins AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr,
702+
I32:$count)> {
707703
let summary = "MBarrier Arrive No-Complete Operation";
708704
let description = [{
709705
The `nvvm.mbarrier.arrive.nocomplete` operation performs an arrive-on operation
@@ -721,33 +717,29 @@ def NVVM_MBarrierArriveNocompleteOp : NVVM_Op<"mbarrier.arrive.nocomplete">,
721717
captures the phase of the *mbarrier object* prior to the arrive-on operation.
722718

723719
The operation takes the following operands:
724-
- `addr`: A pointer to the memory location of the *mbarrier object*. Uses generic
725-
addressing, but the address must still be in the shared memory space.
720+
- `addr`: A pointer to the memory location of the *mbarrier object*. The `addr`
721+
must be a pointer to generic or shared::cta memory. When it is generic, the
722+
underlying address must be within the shared::cta memory space; otherwise
723+
the behavior is undefined.
726724
- `count`: Integer specifying the count argument to the arrive-on operation.
727725
Must be in the valid range as specified in the *mbarrier object* contents.
728726

729727
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
730728
}];
731-
string llvmBuilder = [{
732-
$res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete, {$addr, $count});
733-
}];
734-
let assemblyFormat = "$addr `,` $count attr-dict `:` type(operands) `->` type($res)";
735-
}
736729

737-
def NVVM_MBarrierArriveNocompleteSharedOp : NVVM_Op<"mbarrier.arrive.nocomplete.shared">,
738-
Results<(outs LLVM_Type:$res)>,
739-
Arguments<(ins LLVM_PointerShared:$addr, I32:$count)> {
740-
let summary = "Shared MBarrier Arrive No-Complete Operation";
741-
let description = [{
742-
This Op is the same as `nvvm.mbarrier.arrive.nocomplete` except that the *mbarrier object*
743-
should be accessed using a shared-memory pointer instead of a generic-memory pointer.
730+
let assemblyFormat = "$addr `,` $count attr-dict `:` type(operands) `->` type($res)";
744731

745-
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
732+
let extraClassDeclaration = [{
733+
static mlir::NVVM::IDArgPair
734+
getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
735+
llvm::IRBuilderBase& builder);
746736
}];
737+
747738
string llvmBuilder = [{
748-
$res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete_shared, {$addr, $count});
739+
auto [id, args] = NVVM::MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs(
740+
*op, moduleTranslation, builder);
741+
$res = createIntrinsicCall(builder, id, args);
749742
}];
750-
let assemblyFormat = "$addr `,` $count attr-dict `:` type(operands) `->` type($res)";
751743
}
752744

753745
def NVVM_MBarrierArriveExpectTxOp : NVVM_PTXBuilder_Op<"mbarrier.arrive.expect_tx">,
@@ -896,8 +888,9 @@ def NVVM_MBarrierTryWaitParitySharedOp : NVVM_PTXBuilder_Op<"mbarrier.try_wait.p
896888
}
897889

898890
def NVVM_MBarrierTestWaitOp : NVVM_Op<"mbarrier.test.wait">,
899-
Results<(outs LLVM_Type:$res)>,
900-
Arguments<(ins LLVM_AnyPointer:$addr, LLVM_Type:$state)> {
891+
Results<(outs I1:$res)>,
892+
Arguments<(ins AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr,
893+
I64:$state)> {
901894
let summary = "MBarrier Non-Blocking Test Wait Operation";
902895
let description = [{
903896
The `nvvm.mbarrier.test.wait` operation performs a non-blocking test for the
@@ -944,26 +937,20 @@ def NVVM_MBarrierTestWaitOp : NVVM_Op<"mbarrier.test.wait">,
944937

945938
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-mbarrier-test-wait-try-wait)
946939
}];
947-
string llvmBuilder = [{
948-
$res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_test_wait, {$addr, $state});
949-
}];
950-
let assemblyFormat = "$addr `,` $state attr-dict `:` type(operands) `->` type($res)";
951-
}
952940

953-
def NVVM_MBarrierTestWaitSharedOp : NVVM_Op<"mbarrier.test.wait.shared">,
954-
Results<(outs LLVM_Type:$res)>,
955-
Arguments<(ins LLVM_PointerShared:$addr, LLVM_Type:$state)> {
956-
let summary = "Shared MBarrier Non-Blocking Test Wait Operation";
957-
let description = [{
958-
This Op is the same as `nvvm.mbarrier.test.wait` except that the *mbarrier object*
959-
should be accessed using a shared-memory pointer instead of a generic-memory pointer.
941+
let assemblyFormat = "$addr `,` $state attr-dict `:` type(operands) `->` type($res)";
960942

961-
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-mbarrier-test-wait-try-wait)
943+
let extraClassDeclaration = [{
944+
static mlir::NVVM::IDArgPair
945+
getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
946+
llvm::IRBuilderBase& builder);
962947
}];
948+
963949
string llvmBuilder = [{
964-
$res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_test_wait_shared, {$addr, $state});
950+
auto [id, args] = NVVM::MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs(
951+
*op, moduleTranslation, builder);
952+
$res = createIntrinsicCall(builder, id, args);
965953
}];
966-
let assemblyFormat = "$addr `,` $state attr-dict `:` type(operands) `->` type($res)";
967954
}
968955

969956
//===----------------------------------------------------------------------===//
@@ -1534,47 +1521,30 @@ def NVVM_CpAsyncMBarrierArriveOp : NVVM_Op<"cp.async.mbarrier.arrive"> {
15341521
The `cp.async.mbarrier.arrive` Op makes the *mbarrier object* track
15351522
all prior cp.async operations initiated by the executing thread.
15361523
The `addr` operand specifies the address of the *mbarrier object*
1537-
in generic address space. The `noinc` attr impacts how the
1538-
mbarrier's state is updated.
1524+
in generic or shared::cta address space. When it is generic, the
1525+
underlying memory should fall within the shared::cta space;
1526+
otherwise the behavior is undefined. The `noinc` attr impacts
1527+
how the mbarrier's state is updated.
15391528

15401529
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-cp-async-mbarrier-arrive)
15411530
}];
1542-
let assemblyFormat = "$addr attr-dict `:` type(operands)";
15431531

15441532
let arguments = (ins
1545-
LLVM_AnyPointer:$addr, DefaultValuedAttr<I1Attr, "0">:$noinc);
1533+
AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr,
1534+
DefaultValuedAttr<I1Attr, "0">:$noinc);
15461535

1547-
string llvmBuilder = [{
1548-
auto intId = $noinc ?
1549-
llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_noinc :
1550-
llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive;
1551-
1552-
createIntrinsicCall(builder, intId, {$addr});
1553-
}];
1554-
}
1555-
1556-
def NVVM_CpAsyncMBarrierArriveSharedOp : NVVM_Op<"cp.async.mbarrier.arrive.shared"> {
1557-
let summary = "NVVM Dialect Op for cp.async.mbarrier.arrive.shared";
1558-
let description = [{
1559-
The `cp.async.mbarrier.arrive.shared` Op makes the *mbarrier object*
1560-
track all prior cp.async operations initiated by the executing thread.
1561-
The `addr` operand specifies the address of the *mbarrier object* in
1562-
shared memory. The `noinc` attr impacts how the mbarrier's state
1563-
is updated.
1564-
1565-
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-cp-async-mbarrier-arrive)
1566-
}];
15671536
let assemblyFormat = "$addr attr-dict `:` type(operands)";
15681537

1569-
let arguments = (ins
1570-
LLVM_PointerShared:$addr, DefaultValuedAttr<I1Attr, "0">:$noinc);
1538+
let extraClassDeclaration = [{
1539+
static mlir::NVVM::IDArgPair
1540+
getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
1541+
llvm::IRBuilderBase& builder);
1542+
}];
15711543

15721544
string llvmBuilder = [{
1573-
auto intId = $noinc ?
1574-
llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_noinc_shared :
1575-
llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_shared;
1576-
1577-
createIntrinsicCall(builder, intId, {$addr});
1545+
auto [id, args] = NVVM::CpAsyncMBarrierArriveOp::getIntrinsicIDAndArgs(
1546+
*op, moduleTranslation, builder);
1547+
createIntrinsicCall(builder, id, args);
15781548
}];
15791549
}
15801550

mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -865,13 +865,7 @@ struct NVGPUMBarrierArriveLowering
865865
adaptor.getMbarId(), rewriter);
866866
Type tokenType = getTypeConverter()->convertType(
867867
nvgpu::MBarrierTokenType::get(op->getContext()));
868-
if (isMbarrierShared(op.getBarriers().getType())) {
869-
rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveSharedOp>(op, tokenType,
870-
barrier);
871-
} else {
872-
rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveOp>(op, tokenType,
873-
barrier);
874-
}
868+
rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveOp>(op, tokenType, barrier);
875869
return success();
876870
}
877871
};
@@ -892,13 +886,8 @@ struct NVGPUMBarrierArriveNoCompleteLowering
892886
Type tokenType = getTypeConverter()->convertType(
893887
nvgpu::MBarrierTokenType::get(op->getContext()));
894888
Value count = truncToI32(b, adaptor.getCount());
895-
if (isMbarrierShared(op.getBarriers().getType())) {
896-
rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveNocompleteSharedOp>(
897-
op, tokenType, barrier, count);
898-
} else {
899-
rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveNocompleteOp>(
900-
op, tokenType, barrier, count);
901-
}
889+
rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveNocompleteOp>(
890+
op, tokenType, barrier, count);
902891
return success();
903892
}
904893
};
@@ -915,13 +904,8 @@ struct NVGPUMBarrierTestWaitLowering
915904
getMbarrierPtr(b, op.getBarriers().getType(), adaptor.getBarriers(),
916905
adaptor.getMbarId(), rewriter);
917906
Type retType = rewriter.getI1Type();
918-
if (isMbarrierShared(op.getBarriers().getType())) {
919-
rewriter.replaceOpWithNewOp<NVVM::MBarrierTestWaitSharedOp>(
920-
op, retType, barrier, adaptor.getToken());
921-
} else {
922-
rewriter.replaceOpWithNewOp<NVVM::MBarrierTestWaitOp>(
923-
op, retType, barrier, adaptor.getToken());
924-
}
907+
rewriter.replaceOpWithNewOp<NVVM::MBarrierTestWaitOp>(op, retType, barrier,
908+
adaptor.getToken());
925909
return success();
926910
}
927911
};

mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp

Lines changed: 72 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1637,15 +1637,21 @@ std::string NVVM::MBarrierInitOp::getPtx() {
16371637
// getIntrinsicID/getIntrinsicIDAndArgs methods
16381638
//===----------------------------------------------------------------------===//
16391639

1640+
static bool isPtrInAddrSpace(mlir::Value ptr, NVVMMemorySpace targetAS) {
1641+
auto ptrTy = llvm::cast<LLVM::LLVMPointerType>(ptr.getType());
1642+
return ptrTy.getAddressSpace() == static_cast<unsigned>(targetAS);
1643+
}
1644+
1645+
static bool isPtrInSharedCTASpace(mlir::Value ptr) {
1646+
return isPtrInAddrSpace(ptr, NVVMMemorySpace::Shared);
1647+
}
1648+
16401649
mlir::NVVM::IDArgPair MBarrierInitOp::getIntrinsicIDAndArgs(
16411650
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
16421651
auto thisOp = cast<NVVM::MBarrierInitOp>(op);
1643-
unsigned addressSpace =
1644-
llvm::cast<LLVM::LLVMPointerType>(thisOp.getAddr().getType())
1645-
.getAddressSpace();
1646-
llvm::Intrinsic::ID id = (addressSpace == NVVMMemorySpace::Shared)
1647-
? llvm::Intrinsic::nvvm_mbarrier_init_shared
1648-
: llvm::Intrinsic::nvvm_mbarrier_init;
1652+
bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
1653+
llvm::Intrinsic::ID id = isShared ? llvm::Intrinsic::nvvm_mbarrier_init_shared
1654+
: llvm::Intrinsic::nvvm_mbarrier_init;
16491655

16501656
// Fill the Intrinsic Args
16511657
llvm::SmallVector<llvm::Value *> args;
@@ -1658,16 +1664,72 @@ mlir::NVVM::IDArgPair MBarrierInitOp::getIntrinsicIDAndArgs(
16581664
mlir::NVVM::IDArgPair MBarrierInvalOp::getIntrinsicIDAndArgs(
16591665
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
16601666
auto thisOp = cast<NVVM::MBarrierInvalOp>(op);
1661-
unsigned addressSpace =
1662-
llvm::cast<LLVM::LLVMPointerType>(thisOp.getAddr().getType())
1663-
.getAddressSpace();
1664-
llvm::Intrinsic::ID id = (addressSpace == NVVMMemorySpace::Shared)
1667+
bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
1668+
llvm::Intrinsic::ID id = isShared
16651669
? llvm::Intrinsic::nvvm_mbarrier_inval_shared
16661670
: llvm::Intrinsic::nvvm_mbarrier_inval;
16671671

16681672
return {id, {mt.lookupValue(thisOp.getAddr())}};
16691673
}
16701674

1675+
mlir::NVVM::IDArgPair MBarrierArriveOp::getIntrinsicIDAndArgs(
1676+
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
1677+
auto thisOp = cast<NVVM::MBarrierArriveOp>(op);
1678+
bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
1679+
llvm::Intrinsic::ID id = isShared
1680+
? llvm::Intrinsic::nvvm_mbarrier_arrive_shared
1681+
: llvm::Intrinsic::nvvm_mbarrier_arrive;
1682+
1683+
return {id, {mt.lookupValue(thisOp.getAddr())}};
1684+
}
1685+
1686+
mlir::NVVM::IDArgPair MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs(
1687+
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
1688+
auto thisOp = cast<NVVM::MBarrierArriveNocompleteOp>(op);
1689+
bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
1690+
llvm::Intrinsic::ID id =
1691+
isShared ? llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete_shared
1692+
: llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete;
1693+
// Fill the Intrinsic Args
1694+
llvm::SmallVector<llvm::Value *> args;
1695+
args.push_back(mt.lookupValue(thisOp.getAddr()));
1696+
args.push_back(mt.lookupValue(thisOp.getCount()));
1697+
1698+
return {id, std::move(args)};
1699+
}
1700+
1701+
mlir::NVVM::IDArgPair MBarrierTestWaitOp::getIntrinsicIDAndArgs(
1702+
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
1703+
auto thisOp = cast<NVVM::MBarrierTestWaitOp>(op);
1704+
bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
1705+
llvm::Intrinsic::ID id = isShared
1706+
? llvm::Intrinsic::nvvm_mbarrier_test_wait_shared
1707+
: llvm::Intrinsic::nvvm_mbarrier_test_wait;
1708+
// Fill the Intrinsic Args
1709+
llvm::SmallVector<llvm::Value *> args;
1710+
args.push_back(mt.lookupValue(thisOp.getAddr()));
1711+
args.push_back(mt.lookupValue(thisOp.getState()));
1712+
1713+
return {id, std::move(args)};
1714+
}
1715+
1716+
mlir::NVVM::IDArgPair CpAsyncMBarrierArriveOp::getIntrinsicIDAndArgs(
1717+
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
1718+
auto thisOp = cast<NVVM::CpAsyncMBarrierArriveOp>(op);
1719+
bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
1720+
1721+
llvm::Intrinsic::ID id;
1722+
if (thisOp.getNoinc()) {
1723+
id = isShared ? llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_noinc_shared
1724+
: llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_noinc;
1725+
} else {
1726+
id = isShared ? llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_shared
1727+
: llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive;
1728+
}
1729+
1730+
return {id, {mt.lookupValue(thisOp.getAddr())}};
1731+
}
1732+
16711733
#define CP_ASYNC_ID_IMPL(mod, size, suffix) \
16721734
llvm::Intrinsic::nvvm_cp_async_##mod##_shared_global_##size##suffix
16731735

0 commit comments

Comments
 (0)