|
| 1 | +From 3c1ac713817946d3475fbaf02b9e9d5e67823e89 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Oliver Stannard < [email protected]> |
| 3 | +Date: Thu, 10 Oct 2024 10:17:50 +0200 |
| 4 | +Subject: [DAGISel] Keep flags when converting FP load/store to integer |
| 5 | + (#111679) |
| 6 | + |
| 7 | +This DAG combine replaces a floating-point load/store pair which has no |
| 8 | +other uses with an integer one, but did not copy the memory operand |
| 9 | +flags to the new instructions, resulting in it dropping the volatile |
| 10 | +flag. This optimisation is still valid if one or both of the |
| 11 | +instructions is volatile, so we can copy over the whole |
| 12 | +MachineMemOperand to generate volatile integer loads and stores where |
| 13 | +needed. |
| 14 | +--- |
| 15 | + llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 ++++---- |
| 16 | + .../CodeGen/ARM/load-store-pair-volatile.ll | 24 +++++++++++++++++++ |
| 17 | + llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll | 10 ++++---- |
| 18 | + 3 files changed, 33 insertions(+), 11 deletions(-) |
| 19 | + create mode 100644 llvm/test/CodeGen/ARM/load-store-pair-volatile.ll |
| 20 | + |
| 21 | +diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |
| 22 | +index 7b1f1dc40211..030670c98af8 100644 |
| 23 | +--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |
| 24 | ++++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |
| 25 | +@@ -20045,13 +20045,11 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { |
| 26 | + !FastLD || !FastST) |
| 27 | + return SDValue(); |
| 28 | + |
| 29 | +- SDValue NewLD = |
| 30 | +- DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(), |
| 31 | +- LD->getPointerInfo(), LD->getAlign()); |
| 32 | ++ SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), |
| 33 | ++ LD->getBasePtr(), LD->getMemOperand()); |
| 34 | + |
| 35 | +- SDValue NewST = |
| 36 | +- DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(), |
| 37 | +- ST->getPointerInfo(), ST->getAlign()); |
| 38 | ++ SDValue NewST = DAG.getStore(ST->getChain(), SDLoc(N), NewLD, |
| 39 | ++ ST->getBasePtr(), ST->getMemOperand()); |
| 40 | + |
| 41 | + AddToWorklist(NewLD.getNode()); |
| 42 | + AddToWorklist(NewST.getNode()); |
| 43 | +diff --git a/llvm/test/CodeGen/ARM/load-store-pair-volatile.ll b/llvm/test/CodeGen/ARM/load-store-pair-volatile.ll |
| 44 | +new file mode 100644 |
| 45 | +index 000000000000..6278672d9e23 |
| 46 | +--- /dev/null |
| 47 | ++++ b/llvm/test/CodeGen/ARM/load-store-pair-volatile.ll |
| 48 | +@@ -0,0 +1,24 @@ |
| 49 | ++; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 |
| 50 | ++; RUN: llc -mtriple=arm-none-eabi -stop-after=finalize-isel < %s | FileCheck %s |
| 51 | ++ |
| 52 | ++define void @test(ptr %vol_one, ptr %p_in, ptr %p_out, i32 %n) { |
| 53 | ++ ; CHECK-LABEL: name: test |
| 54 | ++ ; CHECK: bb.0.entry: |
| 55 | ++ ; CHECK-NEXT: liveins: $r0, $r1, $r2 |
| 56 | ++ ; CHECK-NEXT: {{ $}} |
| 57 | ++ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r2 |
| 58 | ++ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r1 |
| 59 | ++ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $r0 |
| 60 | ++ ; CHECK-NEXT: [[LDRi12_:%[0-9]+]]:gpr = LDRi12 [[COPY1]], 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.p_in) |
| 61 | ++ ; CHECK-NEXT: STRi12 killed [[LDRi12_]], [[COPY2]], 0, 14 /* CC::al */, $noreg :: (volatile store (s32) into %ir.vol_one) |
| 62 | ++ ; CHECK-NEXT: [[LDRi12_1:%[0-9]+]]:gpr = LDRi12 [[COPY2]], 4, 14 /* CC::al */, $noreg :: (volatile load (s32) from %ir.vol_two) |
| 63 | ++ ; CHECK-NEXT: STRi12 killed [[LDRi12_1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.p_out) |
| 64 | ++ ; CHECK-NEXT: MOVPCLR 14 /* CC::al */, $noreg |
| 65 | ++entry: |
| 66 | ++ %vol_two = getelementptr inbounds i8, ptr %vol_one, i32 4 |
| 67 | ++ %a = load float, ptr %p_in, align 4 |
| 68 | ++ store volatile float %a, ptr %vol_one, align 4 |
| 69 | ++ %b = load volatile float, ptr %vol_two, align 4 |
| 70 | ++ store float %b, ptr %p_out, align 4 |
| 71 | ++ ret void |
| 72 | ++} |
| 73 | +diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll |
| 74 | +index ccc36530c795..150fa91524ab 100644 |
| 75 | +--- a/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll |
| 76 | ++++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll |
| 77 | +@@ -1442,8 +1442,8 @@ define void @caller_fpr_stack() { |
| 78 | + ; 32BIT-NEXT: renamable $r4 = LWZtoc @f14, $r2 :: (load (s32) from got) |
| 79 | + ; 32BIT-NEXT: renamable $f0 = LFD 0, killed renamable $r3 :: (dereferenceable load (s64) from @d15) |
| 80 | + ; 32BIT-NEXT: renamable $r5 = LWZtoc @f16, $r2 :: (load (s32) from got) |
| 81 | +- ; 32BIT-NEXT: renamable $r3 = LWZ 0, killed renamable $r4 :: (load (s32) from @f14) |
| 82 | +- ; 32BIT-NEXT: renamable $r4 = LWZ 0, killed renamable $r5 :: (load (s32) from @f16) |
| 83 | ++ ; 32BIT-NEXT: renamable $r3 = LWZ 0, killed renamable $r4 :: (dereferenceable load (s32) from @f14) |
| 84 | ++ ; 32BIT-NEXT: renamable $r4 = LWZ 0, killed renamable $r5 :: (dereferenceable load (s32) from @f16) |
| 85 | + ; 32BIT-NEXT: ADJCALLSTACKDOWN 144, 0, implicit-def dead $r1, implicit $r1 |
| 86 | + ; 32BIT-NEXT: renamable $r5 = LI 0 |
| 87 | + ; 32BIT-NEXT: renamable $r6 = LIS 16352 |
| 88 | +@@ -1532,9 +1532,9 @@ define void @caller_fpr_stack() { |
| 89 | + ; 64BIT-NEXT: renamable $x3 = LDtoc @f14, $x2 :: (load (s64) from got) |
| 90 | + ; 64BIT-NEXT: renamable $x4 = LDtoc @d15, $x2 :: (load (s64) from got) |
| 91 | + ; 64BIT-NEXT: renamable $x5 = LDtoc @f16, $x2 :: (load (s64) from got) |
| 92 | +- ; 64BIT-NEXT: renamable $r3 = LWZ 0, killed renamable $x3 :: (load (s32) from @f14) |
| 93 | +- ; 64BIT-NEXT: renamable $x4 = LD 0, killed renamable $x4 :: (load (s64) from @d15) |
| 94 | +- ; 64BIT-NEXT: renamable $r5 = LWZ 0, killed renamable $x5 :: (load (s32) from @f16) |
| 95 | ++ ; 64BIT-NEXT: renamable $r3 = LWZ 0, killed renamable $x3 :: (dereferenceable load (s32) from @f14) |
| 96 | ++ ; 64BIT-NEXT: renamable $x4 = LD 0, killed renamable $x4 :: (dereferenceable load (s64) from @d15) |
| 97 | ++ ; 64BIT-NEXT: renamable $r5 = LWZ 0, killed renamable $x5 :: (dereferenceable load (s32) from @f16) |
| 98 | + ; 64BIT-NEXT: ADJCALLSTACKDOWN 176, 0, implicit-def dead $r1, implicit $r1 |
| 99 | + ; 64BIT-NEXT: renamable $x6 = LDtocCPT %const.0, $x2 :: (load (s64) from got) |
| 100 | + ; 64BIT-NEXT: STW killed renamable $r5, 168, $x1 :: (store (s32)) |
| 101 | +-- |
| 102 | +2.34.1 |
| 103 | + |
0 commit comments