diff --git a/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp b/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp index 0b4739214bf2f..935f0deaf9c8a 100644 --- a/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp +++ b/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp @@ -51,7 +51,6 @@ void buildCommonPassPipeline( pm.addPass(createConvertVectorToSCFPass()); pm.addPass(createConvertSCFToCFPass()); pm.addPass(createConvertNVVMToLLVMPass()); - pm.addPass(createConvertMathToLLVMPass()); pm.addPass(createConvertFuncToLLVMPass()); pm.addPass(memref::createExpandStridedMetadataPass()); @@ -98,6 +97,7 @@ void buildHostPostPipeline(OpPassManager &pm, GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions; gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat; pm.addPass(createGpuModuleToBinaryPass(gpuModuleToBinaryPassOptions)); + pm.addPass(createConvertMathToLLVMPass()); pm.addPass(createCanonicalizerPass()); pm.addPass(createCSEPass()); pm.addPass(createReconcileUnrealizedCastsPass()); diff --git a/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir b/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir new file mode 100644 index 0000000000000..07e719798b851 --- /dev/null +++ b/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir @@ -0,0 +1,30 @@ +// REQUIRES: host-supports-nvptx +// RUN: mlir-opt %s \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=isa" \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s \ +// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=isa" \ +// RUN: --mlir-print-ir-after=convert-gpu-to-nvvm 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK-NVVM + +// This test checks whether the GPU region is compiled correctly to PTX by +// pipeline. It doesn't test IR for GPU side, but it can test Host IR and +// generated PTX. + +// CHECK-LABEL: llvm.func @test_math(%arg0: f32) { +func.func @test_math(%arg0 : f32) { + %c2 = arith.constant 2 : index + %c1 = arith.constant 1 : index + // CHECK: gpu.launch_func @test_math_kernel::@test_math_kernel + // CHECK: gpu.binary @test_math_kernel [#gpu.object<#nvvm.target + gpu.launch + blocks(%0, %1, %2) in (%3 = %c1, %4 = %c1, %5 = %c1) + threads(%6, %7, %8) in (%9 = %c2, %10 = %c1, %11 = %c1) { + // CHECK-NVVM: __nv_expf + %s1 = math.exp %arg0 : f32 + gpu.printf "%f" %s1 : f32 + gpu.terminator + } + return +} \ No newline at end of file