diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index ecad1a16eb6c5..80108a85d9e3c 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -400,6 +400,55 @@ def NVVM_FenceScClusterOp : NVVM_Op<"fence.sc.cluster"> { let assemblyFormat = "attr-dict"; } +def SharedSpaceCTA : I32EnumAttrCase<"shared_cta", 0, "cta">; +def SharedSpaceCluster : I32EnumAttrCase<"shared_cluster", 1, "cluster">; +def SharedSpace : I32EnumAttr<"SharedSpace", "Shared memory space", + [SharedSpaceCTA, SharedSpaceCluster]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::NVVM"; +} +def SharedSpaceAttr : EnumAttr { + let assemblyFormat = "`<` $value `>`"; +} + +def ProxyAlias : I32EnumAttrCase<"alias", 0, "alias">; +def ProxyAsync : I32EnumAttrCase<"async", 1, "async">; +def ProxyAsyncGlobal : I32EnumAttrCase<"async_global", 2, "async.global">; +def ProxyAsyncShared : I32EnumAttrCase<"async_shared", 3, "async.shared">; +def ProxyKind : I32EnumAttr<"ProxyKind", "Proxy kind", + [ProxyAlias, ProxyAsync, ProxyAsyncGlobal, ProxyAsyncShared]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::NVVM"; +} + +def ProxyKindAttr : EnumAttr { + let assemblyFormat = "`<` $value `>`"; +} + +def NVVM_FenceProxyOp : NVVM_PTXBuilder_Op<"fence.proxy">, + Arguments<(ins ProxyKindAttr:$kind, + OptionalAttr:$space)> { + let description = [{ + Fence operation with proxy to establish an ordering between memory accesses + that may happen through different proxies. + [For more information, see PTX ISA] + (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar) + }]; + + let assemblyFormat = "attr-dict"; + let extraClassDefinition = [{ + std::string $cppClass::getPtx() { + std::string ptx = "fence.proxy."; + ptx += stringifyProxyKind(getKind()); + if(getKind() == NVVM::ProxyKind::async_shared) + { ptx += "::"; ptx += stringifySharedSpace(getSpace().value()); } + ptx += ";"; + return ptx; + } + }]; + let hasVerifier = 1; +} + def SetMaxRegisterActionIncrease : I32EnumAttrCase<"increase", 0>; def SetMaxRegisterActionDecrease : I32EnumAttrCase<"decrease", 1>; def SetMaxRegisterAction : I32EnumAttr<"SetMaxRegisterAction", "NVVM set max register action", diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index ff6b5da78bdfe..4f5d71e10f68c 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -22,6 +22,7 @@ #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/Diagnostics.h" #include "mlir/IR/DialectImplementation.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Operation.h" @@ -1006,6 +1007,15 @@ void NVVM::WgmmaMmaAsyncOp::getAsmValues( mlir::NVVM::PTXRegisterMod::Read}); } } +LogicalResult NVVM::FenceProxyOp::verify() { + if (getKind() == NVVM::ProxyKind::async_shared && !getSpace().has_value()) { + return emitOpError() << "async_shared fence requires space attribute"; + } + if (getKind() != NVVM::ProxyKind::async_shared && getSpace().has_value()) { + return emitOpError() << "only async_shared fence can have space attribute"; + } + return success(); +} LogicalResult NVVM::SetMaxRegisterOp::verify() { if (getRegCount() % 8) diff --git a/mlir/test/Conversion/NVVMToLLVM/invalid.mlir b/mlir/test/Conversion/NVVMToLLVM/invalid.mlir index 1328755f69d89..34c8de9f7ed8c 100644 --- a/mlir/test/Conversion/NVVMToLLVM/invalid.mlir +++ b/mlir/test/Conversion/NVVMToLLVM/invalid.mlir @@ -147,3 +147,19 @@ func.func @set_max_register() { nvvm.setmaxregister decrease 51 func.return } + +// ----- + +func.func @fence_proxy() { + // expected-error @+1 {{op only async_shared fence can have space attribute}} + nvvm.fence.proxy { kind = #nvvm.proxy_kind, space = #nvvm.shared_space} + func.return +} + +// ----- + +func.func @fence_proxy() { + // expected-error @+1 {{op async_shared fence requires space attribute}} + nvvm.fence.proxy { kind = #nvvm.proxy_kind} + func.return +} diff --git a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir index 5482cc194192d..1b41704409d3e 100644 --- a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir +++ b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir @@ -629,3 +629,18 @@ func.func @cp_bulk_commit() { nvvm.cp.async.bulk.commit.group func.return } +// ----- + +func.func @fence_proxy() { + //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "fence.proxy.alias;", "" : () -> () + nvvm.fence.proxy { kind = #nvvm.proxy_kind} + //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "fence.proxy.async;", "" : () -> () + nvvm.fence.proxy { kind = #nvvm.proxy_kind} + //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "fence.proxy.async.global;", "" : () -> () + nvvm.fence.proxy { kind = #nvvm.proxy_kind} + //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "fence.proxy.async.shared::cta;", "" : () -> () + nvvm.fence.proxy { kind = #nvvm.proxy_kind, space = #nvvm.shared_space} + //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "fence.proxy.async.shared::cluster;", "" : () -> () + nvvm.fence.proxy { kind = #nvvm.proxy_kind, space = #nvvm.shared_space} + func.return +}