Skip to content

Commit febbd4c

Browse files
committed
[MLIR][NVVM] Update mbarrier Ops to use AnyTypeOf[] (2/n)
This is a follow up of PR #165558. (1/n) This patch updates the below mbarrier Ops to use AnyTypeOf[] construct: * mbarrier.arrive * mbarrier.arrive.noComplete * mbarrier.test.wait * cp.async.mbarrier.arrive * Updated existing tests accordingly. * Verified locally that there are no new regressions in the `integration` tests. * TODO: A few more Ops are remaining and will be migrated in a subsequent PR. Signed-off-by: Durgadoss R <[email protected]>
1 parent 7b3fe5f commit febbd4c

File tree

9 files changed

+152
-137
lines changed

9 files changed

+152
-137
lines changed

flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -891,8 +891,7 @@ CUDAIntrinsicLibrary::genBarrierArrive(mlir::Type resultType,
891891
assert(args.size() == 1);
892892
mlir::Value barrier = convertPtrToNVVMSpace(
893893
builder, loc, args[0], mlir::NVVM::NVVMMemorySpace::Shared);
894-
return mlir::NVVM::MBarrierArriveSharedOp::create(builder, loc, resultType,
895-
barrier)
894+
return mlir::NVVM::MBarrierArriveOp::create(builder, loc, resultType, barrier)
896895
.getResult();
897896
}
898897

flang/test/Lower/CUDA/cuda-device-proc.cuf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,7 @@ end subroutine
440440

441441
! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr
442442
! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3>
443-
! CHECK: %{{.*}} = nvvm.mbarrier.arrive.shared %[[SHARED_PTR]] : !llvm.ptr<3> -> i64
443+
! CHECK: %{{.*}} = nvvm.mbarrier.arrive %[[SHARED_PTR]] : !llvm.ptr<3> -> i64
444444

445445
! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr
446446
! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3>

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td

Lines changed: 56 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -658,8 +658,8 @@ def NVVM_MBarrierInvalOp : NVVM_Op<"mbarrier.inval">,
658658
}
659659

660660
def NVVM_MBarrierArriveOp : NVVM_Op<"mbarrier.arrive">,
661-
Results<(outs LLVM_Type:$res)>,
662-
Arguments<(ins LLVM_AnyPointer:$addr)> {
661+
Results<(outs I64:$res)>,
662+
Arguments<(ins AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr)> {
663663
let summary = "MBarrier Arrive Operation";
664664
let description = [{
665665
The `nvvm.mbarrier.arrive` operation performs an arrive-on operation on the
@@ -676,36 +676,32 @@ def NVVM_MBarrierArriveOp : NVVM_Op<"mbarrier.arrive">,
676676
value are implementation-specific.
677677

678678
The operation takes the following operand:
679-
- `addr`: A pointer to the memory location of the *mbarrier object*. Uses generic
680-
addressing, but the address must still be in the shared memory space.
679+
- `addr`: A pointer to the memory location of the *mbarrier object*. The `addr`
680+
must be a pointer to generic or shared::cta memory. When it is generic, the
681+
underlying address must be within the shared::cta memory space; otherwise
682+
the behavior is undefined.
681683

682684
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
683685
}];
684-
string llvmBuilder = [{
685-
$res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive, {$addr});
686-
}];
687686
let assemblyFormat = "$addr attr-dict `:` type($addr) `->` type($res)";
688-
}
689687

690-
def NVVM_MBarrierArriveSharedOp : NVVM_Op<"mbarrier.arrive.shared">,
691-
Results<(outs LLVM_Type:$res)>,
692-
Arguments<(ins LLVM_PointerShared:$addr)> {
693-
let summary = "Shared MBarrier Arrive Operation";
694-
let description = [{
695-
This Op is the same as `nvvm.mbarrier.arrive` except that the *mbarrier object*
696-
should be accessed using a shared-memory pointer instead of a generic-memory pointer.
697-
698-
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
688+
let extraClassDeclaration = [{
689+
static mlir::NVVM::IDArgPair
690+
getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
691+
llvm::IRBuilderBase& builder);
699692
}];
693+
700694
string llvmBuilder = [{
701-
$res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive_shared, {$addr});
695+
auto [id, args] = NVVM::MBarrierArriveOp::getIntrinsicIDAndArgs(
696+
*op, moduleTranslation, builder);
697+
$res = createIntrinsicCall(builder, id, args);
702698
}];
703-
let assemblyFormat = "$addr attr-dict `:` qualified(type($addr)) `->` type($res)";
704699
}
705700

706701
def NVVM_MBarrierArriveNocompleteOp : NVVM_Op<"mbarrier.arrive.nocomplete">,
707-
Results<(outs LLVM_Type:$res)>,
708-
Arguments<(ins LLVM_AnyPointer:$addr, I32:$count)> {
702+
Results<(outs I64:$res)>,
703+
Arguments<(ins AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr,
704+
I32:$count)> {
709705
let summary = "MBarrier Arrive No-Complete Operation";
710706
let description = [{
711707
The `nvvm.mbarrier.arrive.nocomplete` operation performs an arrive-on operation
@@ -723,33 +719,29 @@ def NVVM_MBarrierArriveNocompleteOp : NVVM_Op<"mbarrier.arrive.nocomplete">,
723719
captures the phase of the *mbarrier object* prior to the arrive-on operation.
724720

725721
The operation takes the following operands:
726-
- `addr`: A pointer to the memory location of the *mbarrier object*. Uses generic
727-
addressing, but the address must still be in the shared memory space.
722+
- `addr`: A pointer to the memory location of the *mbarrier object*. The `addr`
723+
must be a pointer to generic or shared::cta memory. When it is generic, the
724+
underlying address must be within the shared::cta memory space; otherwise
725+
the behavior is undefined.
728726
- `count`: Integer specifying the count argument to the arrive-on operation.
729727
Must be in the valid range as specified in the *mbarrier object* contents.
730728

731729
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
732730
}];
733-
string llvmBuilder = [{
734-
$res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete, {$addr, $count});
735-
}];
736-
let assemblyFormat = "$addr `,` $count attr-dict `:` type(operands) `->` type($res)";
737-
}
738731

739-
def NVVM_MBarrierArriveNocompleteSharedOp : NVVM_Op<"mbarrier.arrive.nocomplete.shared">,
740-
Results<(outs LLVM_Type:$res)>,
741-
Arguments<(ins LLVM_PointerShared:$addr, I32:$count)> {
742-
let summary = "Shared MBarrier Arrive No-Complete Operation";
743-
let description = [{
744-
This Op is the same as `nvvm.mbarrier.arrive.nocomplete` except that the *mbarrier object*
745-
should be accessed using a shared-memory pointer instead of a generic-memory pointer.
732+
let assemblyFormat = "$addr `,` $count attr-dict `:` type(operands) `->` type($res)";
746733

747-
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
734+
let extraClassDeclaration = [{
735+
static mlir::NVVM::IDArgPair
736+
getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
737+
llvm::IRBuilderBase& builder);
748738
}];
739+
749740
string llvmBuilder = [{
750-
$res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete_shared, {$addr, $count});
741+
auto [id, args] = NVVM::MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs(
742+
*op, moduleTranslation, builder);
743+
$res = createIntrinsicCall(builder, id, args);
751744
}];
752-
let assemblyFormat = "$addr `,` $count attr-dict `:` type(operands) `->` type($res)";
753745
}
754746

755747
def NVVM_MBarrierArriveExpectTxOp : NVVM_PTXBuilder_Op<"mbarrier.arrive.expect_tx">,
@@ -898,8 +890,9 @@ def NVVM_MBarrierTryWaitParitySharedOp : NVVM_PTXBuilder_Op<"mbarrier.try_wait.p
898890
}
899891

900892
def NVVM_MBarrierTestWaitOp : NVVM_Op<"mbarrier.test.wait">,
901-
Results<(outs LLVM_Type:$res)>,
902-
Arguments<(ins LLVM_AnyPointer:$addr, LLVM_Type:$state)> {
893+
Results<(outs I1:$res)>,
894+
Arguments<(ins AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr,
895+
I64:$state)> {
903896
let summary = "MBarrier Non-Blocking Test Wait Operation";
904897
let description = [{
905898
The `nvvm.mbarrier.test.wait` operation performs a non-blocking test for the
@@ -946,26 +939,20 @@ def NVVM_MBarrierTestWaitOp : NVVM_Op<"mbarrier.test.wait">,
946939

947940
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-mbarrier-test-wait-try-wait)
948941
}];
949-
string llvmBuilder = [{
950-
$res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_test_wait, {$addr, $state});
951-
}];
952-
let assemblyFormat = "$addr `,` $state attr-dict `:` type(operands) `->` type($res)";
953-
}
954942

955-
def NVVM_MBarrierTestWaitSharedOp : NVVM_Op<"mbarrier.test.wait.shared">,
956-
Results<(outs LLVM_Type:$res)>,
957-
Arguments<(ins LLVM_PointerShared:$addr, LLVM_Type:$state)> {
958-
let summary = "Shared MBarrier Non-Blocking Test Wait Operation";
959-
let description = [{
960-
This Op is the same as `nvvm.mbarrier.test.wait` except that the *mbarrier object*
961-
should be accessed using a shared-memory pointer instead of a generic-memory pointer.
943+
let assemblyFormat = "$addr `,` $state attr-dict `:` type(operands) `->` type($res)";
962944

963-
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-mbarrier-test-wait-try-wait)
945+
let extraClassDeclaration = [{
946+
static mlir::NVVM::IDArgPair
947+
getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
948+
llvm::IRBuilderBase& builder);
964949
}];
950+
965951
string llvmBuilder = [{
966-
$res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_test_wait_shared, {$addr, $state});
952+
auto [id, args] = NVVM::MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs(
953+
*op, moduleTranslation, builder);
954+
$res = createIntrinsicCall(builder, id, args);
967955
}];
968-
let assemblyFormat = "$addr `,` $state attr-dict `:` type(operands) `->` type($res)";
969956
}
970957

971958
//===----------------------------------------------------------------------===//
@@ -1541,47 +1528,30 @@ def NVVM_CpAsyncMBarrierArriveOp : NVVM_Op<"cp.async.mbarrier.arrive"> {
15411528
The `cp.async.mbarrier.arrive` Op makes the *mbarrier object* track
15421529
all prior cp.async operations initiated by the executing thread.
15431530
The `addr` operand specifies the address of the *mbarrier object*
1544-
in generic address space. The `noinc` attr impacts how the
1545-
mbarrier's state is updated.
1531+
in generic or shared::cta address space. When it is generic, the
1532+
underlying memory should fall within the shared::cta space;
1533+
otherwise the behavior is undefined. The `noinc` attr impacts
1534+
how the mbarrier's state is updated.
15461535

15471536
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-cp-async-mbarrier-arrive)
15481537
}];
1549-
let assemblyFormat = "$addr attr-dict `:` type(operands)";
15501538

15511539
let arguments = (ins
1552-
LLVM_AnyPointer:$addr, DefaultValuedAttr<I1Attr, "0">:$noinc);
1553-
1554-
string llvmBuilder = [{
1555-
auto intId = $noinc ?
1556-
llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_noinc :
1557-
llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive;
1558-
1559-
createIntrinsicCall(builder, intId, {$addr});
1560-
}];
1561-
}
1540+
AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr,
1541+
DefaultValuedAttr<I1Attr, "0">:$noinc);
15621542

1563-
def NVVM_CpAsyncMBarrierArriveSharedOp : NVVM_Op<"cp.async.mbarrier.arrive.shared"> {
1564-
let summary = "NVVM Dialect Op for cp.async.mbarrier.arrive.shared";
1565-
let description = [{
1566-
The `cp.async.mbarrier.arrive.shared` Op makes the *mbarrier object*
1567-
track all prior cp.async operations initiated by the executing thread.
1568-
The `addr` operand specifies the address of the *mbarrier object* in
1569-
shared memory. The `noinc` attr impacts how the mbarrier's state
1570-
is updated.
1571-
1572-
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-cp-async-mbarrier-arrive)
1573-
}];
15741543
let assemblyFormat = "$addr attr-dict `:` type(operands)";
15751544

1576-
let arguments = (ins
1577-
LLVM_PointerShared:$addr, DefaultValuedAttr<I1Attr, "0">:$noinc);
1545+
let extraClassDeclaration = [{
1546+
static mlir::NVVM::IDArgPair
1547+
getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
1548+
llvm::IRBuilderBase& builder);
1549+
}];
15781550

15791551
string llvmBuilder = [{
1580-
auto intId = $noinc ?
1581-
llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_noinc_shared :
1582-
llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_shared;
1583-
1584-
createIntrinsicCall(builder, intId, {$addr});
1552+
auto [id, args] = NVVM::CpAsyncMBarrierArriveOp::getIntrinsicIDAndArgs(
1553+
*op, moduleTranslation, builder);
1554+
createIntrinsicCall(builder, id, args);
15851555
}];
15861556
}
15871557

mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -865,13 +865,7 @@ struct NVGPUMBarrierArriveLowering
865865
adaptor.getMbarId(), rewriter);
866866
Type tokenType = getTypeConverter()->convertType(
867867
nvgpu::MBarrierTokenType::get(op->getContext()));
868-
if (isMbarrierShared(op.getBarriers().getType())) {
869-
rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveSharedOp>(op, tokenType,
870-
barrier);
871-
} else {
872-
rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveOp>(op, tokenType,
873-
barrier);
874-
}
868+
rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveOp>(op, tokenType, barrier);
875869
return success();
876870
}
877871
};
@@ -892,13 +886,8 @@ struct NVGPUMBarrierArriveNoCompleteLowering
892886
Type tokenType = getTypeConverter()->convertType(
893887
nvgpu::MBarrierTokenType::get(op->getContext()));
894888
Value count = truncToI32(b, adaptor.getCount());
895-
if (isMbarrierShared(op.getBarriers().getType())) {
896-
rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveNocompleteSharedOp>(
897-
op, tokenType, barrier, count);
898-
} else {
899-
rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveNocompleteOp>(
900-
op, tokenType, barrier, count);
901-
}
889+
rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveNocompleteOp>(
890+
op, tokenType, barrier, count);
902891
return success();
903892
}
904893
};
@@ -915,13 +904,8 @@ struct NVGPUMBarrierTestWaitLowering
915904
getMbarrierPtr(b, op.getBarriers().getType(), adaptor.getBarriers(),
916905
adaptor.getMbarId(), rewriter);
917906
Type retType = rewriter.getI1Type();
918-
if (isMbarrierShared(op.getBarriers().getType())) {
919-
rewriter.replaceOpWithNewOp<NVVM::MBarrierTestWaitSharedOp>(
920-
op, retType, barrier, adaptor.getToken());
921-
} else {
922-
rewriter.replaceOpWithNewOp<NVVM::MBarrierTestWaitOp>(
923-
op, retType, barrier, adaptor.getToken());
924-
}
907+
rewriter.replaceOpWithNewOp<NVVM::MBarrierTestWaitOp>(op, retType, barrier,
908+
adaptor.getToken());
925909
return success();
926910
}
927911
};

mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp

Lines changed: 72 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1752,15 +1752,21 @@ std::string NVVM::MBarrierInitOp::getPtx() {
17521752
// getIntrinsicID/getIntrinsicIDAndArgs methods
17531753
//===----------------------------------------------------------------------===//
17541754

1755+
static bool isPtrInAddrSpace(mlir::Value ptr, NVVMMemorySpace targetAS) {
1756+
auto ptrTy = llvm::cast<LLVM::LLVMPointerType>(ptr.getType());
1757+
return ptrTy.getAddressSpace() == static_cast<unsigned>(targetAS);
1758+
}
1759+
1760+
static bool isPtrInSharedCTASpace(mlir::Value ptr) {
1761+
return isPtrInAddrSpace(ptr, NVVMMemorySpace::Shared);
1762+
}
1763+
17551764
mlir::NVVM::IDArgPair MBarrierInitOp::getIntrinsicIDAndArgs(
17561765
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
17571766
auto thisOp = cast<NVVM::MBarrierInitOp>(op);
1758-
unsigned addressSpace =
1759-
llvm::cast<LLVM::LLVMPointerType>(thisOp.getAddr().getType())
1760-
.getAddressSpace();
1761-
llvm::Intrinsic::ID id = (addressSpace == NVVMMemorySpace::Shared)
1762-
? llvm::Intrinsic::nvvm_mbarrier_init_shared
1763-
: llvm::Intrinsic::nvvm_mbarrier_init;
1767+
bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
1768+
llvm::Intrinsic::ID id = isShared ? llvm::Intrinsic::nvvm_mbarrier_init_shared
1769+
: llvm::Intrinsic::nvvm_mbarrier_init;
17641770

17651771
// Fill the Intrinsic Args
17661772
llvm::SmallVector<llvm::Value *> args;
@@ -1773,16 +1779,72 @@ mlir::NVVM::IDArgPair MBarrierInitOp::getIntrinsicIDAndArgs(
17731779
mlir::NVVM::IDArgPair MBarrierInvalOp::getIntrinsicIDAndArgs(
17741780
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
17751781
auto thisOp = cast<NVVM::MBarrierInvalOp>(op);
1776-
unsigned addressSpace =
1777-
llvm::cast<LLVM::LLVMPointerType>(thisOp.getAddr().getType())
1778-
.getAddressSpace();
1779-
llvm::Intrinsic::ID id = (addressSpace == NVVMMemorySpace::Shared)
1782+
bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
1783+
llvm::Intrinsic::ID id = isShared
17801784
? llvm::Intrinsic::nvvm_mbarrier_inval_shared
17811785
: llvm::Intrinsic::nvvm_mbarrier_inval;
17821786

17831787
return {id, {mt.lookupValue(thisOp.getAddr())}};
17841788
}
17851789

1790+
mlir::NVVM::IDArgPair MBarrierArriveOp::getIntrinsicIDAndArgs(
1791+
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
1792+
auto thisOp = cast<NVVM::MBarrierArriveOp>(op);
1793+
bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
1794+
llvm::Intrinsic::ID id = isShared
1795+
? llvm::Intrinsic::nvvm_mbarrier_arrive_shared
1796+
: llvm::Intrinsic::nvvm_mbarrier_arrive;
1797+
1798+
return {id, {mt.lookupValue(thisOp.getAddr())}};
1799+
}
1800+
1801+
mlir::NVVM::IDArgPair MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs(
1802+
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
1803+
auto thisOp = cast<NVVM::MBarrierArriveNocompleteOp>(op);
1804+
bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
1805+
llvm::Intrinsic::ID id =
1806+
isShared ? llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete_shared
1807+
: llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete;
1808+
// Fill the Intrinsic Args
1809+
llvm::SmallVector<llvm::Value *> args;
1810+
args.push_back(mt.lookupValue(thisOp.getAddr()));
1811+
args.push_back(mt.lookupValue(thisOp.getCount()));
1812+
1813+
return {id, std::move(args)};
1814+
}
1815+
1816+
mlir::NVVM::IDArgPair MBarrierTestWaitOp::getIntrinsicIDAndArgs(
1817+
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
1818+
auto thisOp = cast<NVVM::MBarrierTestWaitOp>(op);
1819+
bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
1820+
llvm::Intrinsic::ID id = isShared
1821+
? llvm::Intrinsic::nvvm_mbarrier_test_wait_shared
1822+
: llvm::Intrinsic::nvvm_mbarrier_test_wait;
1823+
// Fill the Intrinsic Args
1824+
llvm::SmallVector<llvm::Value *> args;
1825+
args.push_back(mt.lookupValue(thisOp.getAddr()));
1826+
args.push_back(mt.lookupValue(thisOp.getState()));
1827+
1828+
return {id, std::move(args)};
1829+
}
1830+
1831+
mlir::NVVM::IDArgPair CpAsyncMBarrierArriveOp::getIntrinsicIDAndArgs(
1832+
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
1833+
auto thisOp = cast<NVVM::CpAsyncMBarrierArriveOp>(op);
1834+
bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
1835+
1836+
llvm::Intrinsic::ID id;
1837+
if (thisOp.getNoinc()) {
1838+
id = isShared ? llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_noinc_shared
1839+
: llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_noinc;
1840+
} else {
1841+
id = isShared ? llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_shared
1842+
: llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive;
1843+
}
1844+
1845+
return {id, {mt.lookupValue(thisOp.getAddr())}};
1846+
}
1847+
17861848
#define CP_ASYNC_ID_IMPL(mod, size, suffix) \
17871849
llvm::Intrinsic::nvvm_cp_async_##mod##_shared_global_##size##suffix
17881850

0 commit comments

Comments
 (0)