From 1833ddf13cba1cadfb0beba35487839e1c8051b1 Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Wed, 19 Feb 2025 00:20:32 +0000 Subject: [PATCH 1/3] pre-commit tests --- .../CodeGen/NVPTX/addrspacecast-folding.ll | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll diff --git a/llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll b/llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll new file mode 100644 index 0000000000000..05eb0385eb571 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mcpu=sm_20 -O0 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -mcpu=sm_20 -O0 | %ptxas-verify %} + +target triple = "nvptx64-unknown-unknown" + +define ptr @test1(ptr %p) { +; CHECK-LABEL: test1( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [test1_param_0]; +; CHECK-NEXT: cvta.to.local.u64 %rd2, %rd1; +; CHECK-NEXT: cvta.local.u64 %rd3, %rd2; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; + %a = addrspacecast ptr %p to ptr addrspace(5) + %b = addrspacecast ptr addrspace(5) %a to ptr + ret ptr %b +} + +define ptr addrspace(1) @test2(ptr addrspace(5) %p) { +; CHECK-LABEL: test2( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [test2_param_0]; +; CHECK-NEXT: cvta.local.u64 %rd2, %rd1; +; CHECK-NEXT: cvta.to.global.u64 %rd3, %rd2; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; + %a = addrspacecast ptr addrspace(5) %p to ptr + %b = addrspacecast ptr %a to ptr addrspace(1) + ret ptr addrspace(1) %b +} From a304a777b8e18513d1eed6ee9a469a1318905190 Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Wed, 19 Feb 2025 00:21:55 +0000 Subject: [PATCH 2/3] [DAGCombiner] Add very basic folds for ADDRSPACECAST --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 21 +++++++++++++++++++ .../CodeGen/NVPTX/addrspacecast-folding.ll | 11 +++------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index bc7cdf38dbc2a..1ed3da6e2dd12 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -488,6 +488,7 @@ namespace { SDValue visitTRUNCATE(SDNode *N); SDValue visitTRUNCATE_USAT_U(SDNode *N); SDValue visitBITCAST(SDNode *N); + SDValue visitADDRSPACECAST(SDNode *N); SDValue visitFREEZE(SDNode *N); SDValue visitBUILD_PAIR(SDNode *N); SDValue visitFADD(SDNode *N); @@ -1920,6 +1921,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::TRUNCATE: return visitTRUNCATE(N); case ISD::TRUNCATE_USAT_U: return visitTRUNCATE_USAT_U(N); case ISD::BITCAST: return visitBITCAST(N); + case ISD::ADDRSPACECAST: return visitADDRSPACECAST(N); case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); case ISD::FADD: return visitFADD(N); case ISD::STRICT_FADD: return visitSTRICT_FADD(N); @@ -16054,6 +16056,25 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitADDRSPACECAST(SDNode *N) { + auto *ASCN1 = cast(N); + + if (auto *ASCN2 = dyn_cast(ASCN1->getOperand(0))) { + assert(ASCN2->getDestAddressSpace() == ASCN1->getSrcAddressSpace()); + + // Fold asc[B -> A](asc[A -> B](x)) -> x + if (ASCN1->getDestAddressSpace() == ASCN2->getSrcAddressSpace()) + return ASCN2->getOperand(0); + + // Fold asc[B -> C](asc[A -> B](x)) -> asc[A -> C](x) + return DAG.getAddrSpaceCast( + SDLoc(N), N->getValueType(0), ASCN2->getOperand(0), + ASCN2->getSrcAddressSpace(), ASCN1->getDestAddressSpace()); + } + + return SDValue(); +} + SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { EVT VT = N->getValueType(0); return CombineConsecutiveLoads(N, VT); diff --git a/llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll b/llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll index 05eb0385eb571..11c2b6782e0d3 100644 --- a/llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll +++ b/llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll @@ -7,13 +7,11 @@ target triple = "nvptx64-unknown-unknown" define ptr @test1(ptr %p) { ; CHECK-LABEL: test1( ; CHECK: { -; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u64 %rd1, [test1_param_0]; -; CHECK-NEXT: cvta.to.local.u64 %rd2, %rd1; -; CHECK-NEXT: cvta.local.u64 %rd3, %rd2; -; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; ; CHECK-NEXT: ret; %a = addrspacecast ptr %p to ptr addrspace(5) %b = addrspacecast ptr addrspace(5) %a to ptr @@ -23,13 +21,10 @@ define ptr @test1(ptr %p) { define ptr addrspace(1) @test2(ptr addrspace(5) %p) { ; CHECK-LABEL: test2( ; CHECK: { -; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u64 %rd1, [test2_param_0]; -; CHECK-NEXT: cvta.local.u64 %rd2, %rd1; -; CHECK-NEXT: cvta.to.global.u64 %rd3, %rd2; -; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; ; CHECK-NEXT: ret; %a = addrspacecast ptr addrspace(5) %p to ptr %b = addrspacecast ptr %a to ptr addrspace(1) From 6dc9383f637f7b656e8f06cc87227e694ba5ea93 Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Wed, 19 Feb 2025 16:06:22 +0000 Subject: [PATCH 3/3] fixup tests --- .../codegen-prepare-addrspacecast-non-null.ll | 32 +++++++++---------- llvm/test/CodeGen/SystemZ/mixed-ptr-sizes.ll | 6 ++-- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll index 3216e71e6221a..9b79d528c14a2 100644 --- a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll +++ b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll @@ -320,13 +320,6 @@ define i32 @cast_private_to_flat_to_local(ptr addrspace(5) %private.ptr) { ; DAGISEL-ASM-LABEL: cast_private_to_flat_to_local: ; DAGISEL-ASM: ; %bb.0: ; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; DAGISEL-ASM-NEXT: s_mov_b64 s[4:5], src_private_base -; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, s5 -; DAGISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 -; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc -; DAGISEL-ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc ; DAGISEL-ASM-NEXT: ds_read_b32 v0, v0 ; DAGISEL-ASM-NEXT: s_waitcnt lgkmcnt(0) ; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31] @@ -359,15 +352,22 @@ define i32 @cast_private_to_flat_to_global(ptr addrspace(6) %const32.ptr) { ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(3) [[LOCAL_PTR]], align 4 ; OPT-NEXT: ret i32 [[LOAD]] ; -; ASM-LABEL: cast_private_to_flat_to_global: -; ASM: ; %bb.0: -; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; ASM-NEXT: v_mov_b32_e32 v1, 0 -; ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc -; ASM-NEXT: ds_read_b32 v0, v0 -; ASM-NEXT: s_waitcnt lgkmcnt(0) -; ASM-NEXT: s_setpc_b64 s[30:31] +; DAGISEL-ASM-LABEL: cast_private_to_flat_to_global: +; DAGISEL-ASM: ; %bb.0: +; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-ASM-NEXT: ds_read_b32 v0, v0 +; DAGISEL-ASM-NEXT: s_waitcnt lgkmcnt(0) +; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-ASM-LABEL: cast_private_to_flat_to_global: +; GISEL-ASM: ; %bb.0: +; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-ASM-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GISEL-ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GISEL-ASM-NEXT: ds_read_b32 v0, v0 +; GISEL-ASM-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-ASM-NEXT: s_setpc_b64 s[30:31] %flat.ptr = addrspacecast ptr addrspace(6) %const32.ptr to ptr %local.ptr = addrspacecast ptr %flat.ptr to ptr addrspace(3) %load = load volatile i32, ptr addrspace(3) %local.ptr diff --git a/llvm/test/CodeGen/SystemZ/mixed-ptr-sizes.ll b/llvm/test/CodeGen/SystemZ/mixed-ptr-sizes.ll index a95f68b5e118d..b46069c782968 100644 --- a/llvm/test/CodeGen/SystemZ/mixed-ptr-sizes.ll +++ b/llvm/test/CodeGen/SystemZ/mixed-ptr-sizes.ll @@ -332,9 +332,7 @@ define signext i32 @setlength() { ; CHECK: lgr [[MALLOC:[0-9]+]],3 ; CHECK: basr 7,6 ; CHECK: lgr [[LENGTH:[0-9]+]],3 -; CHECK: la [[ADDR:[0-9]+]],4([[MALLOC]]) -; CHECK: llgtr [[ADDR]],[[ADDR]] -; CHECK: stg [[LENGTH]],0([[ADDR]]) +; CHECK: stg [[LENGTH]],4([[MALLOC]]) entry: %call = tail call ptr @__malloc31(i64 noundef 8) %call1 = tail call signext i32 @foo() @@ -357,7 +355,7 @@ define signext i32 @setlength2() { ; CHECK: basr 7,6 ; CHECK: lgr [[LENGTH:[0-9]+]],3 ; CHECK: ahi [[MALLOC]],4 -; CHECK: llgtr [[ADDR]],[[MALLOC]] +; CHECK: llgtr [[ADDR:[0-9]+]],[[MALLOC]] ; CHECK: stg [[LENGTH]],0([[ADDR]]) entry: %call = tail call ptr addrspace(1) @domalloc(i64 noundef 8)