From b1ee138e7511dac2a9352f5441fef71ae9260d9a Mon Sep 17 00:00:00 2001 From: guochen2 Date: Tue, 6 Aug 2024 14:43:46 -0400 Subject: [PATCH 1/4] [AMDGPU][CodeGen] support v_mov_b16 and v_swap_b16 in true16 format --- llvm/lib/Target/AMDGPU/SIInstructions.td | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index c41850ab55f75..2fcdcbd6b5ba1 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2192,20 +2192,6 @@ foreach pred = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in { } } -let True16Predicate = UseRealTrue16Insts in { - def : GCNPat < - (VGPRImm<(i16 imm)>:$imm), - (V_MOV_B16_t16_e64 0, imm:$imm, 0) - >; - - foreach vt = [f16, bf16] in { - def : GCNPat < - (VGPRImm<(vt fpimm)>:$imm), - (V_MOV_B16_t16_e64 0, $imm, 0) - >; - } -} - // V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit // immediate and wil be expanded as needed, but we will only use these patterns // for values which can be encoded. From b01863a05ea636a553fda11fb107b03e6d206151 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Thu, 8 Aug 2024 11:36:10 -0400 Subject: [PATCH 2/4] added back the missing imm pattern for mov_b16 --- llvm/lib/Target/AMDGPU/SIInstructions.td | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 2fcdcbd6b5ba1..c41850ab55f75 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2192,6 +2192,20 @@ foreach pred = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in { } } +let True16Predicate = UseRealTrue16Insts in { + def : GCNPat < + (VGPRImm<(i16 imm)>:$imm), + (V_MOV_B16_t16_e64 0, imm:$imm, 0) + >; + + foreach vt = [f16, bf16] in { + def : GCNPat < + (VGPRImm<(vt fpimm)>:$imm), + (V_MOV_B16_t16_e64 0, $imm, 0) + >; + } +} + // V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit // immediate and wil be expanded as needed, but we will only use these patterns // for values which can be encoded. From acfb65ae6dee17685b87c4b70582bcd2ace8da85 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Mon, 12 Aug 2024 13:42:25 -0400 Subject: [PATCH 3/4] [AMDGPU][True16] fix a bug in codeGen causing e64 with wrong vgpr type to shrink --- llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 155747551471e..5d38cafd73dd9 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -1048,7 +1048,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { MachineFunctionProperties::Property::NoVRegs)) continue; - if (ST->useRealTrue16Insts() && AMDGPU::isTrue16Inst(MI.getOpcode()) && + if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) && !shouldShrinkTrue16(MI)) continue; From 6fb5015b4b4128cc135b6f998fd4384b376015fe Mon Sep 17 00:00:00 2001 From: guochen2 Date: Mon, 12 Aug 2024 15:42:48 -0400 Subject: [PATCH 4/4] added a mir test for shrinking Lo128 register type --- llvm/test/CodeGen/AMDGPU/shrink-true16.mir | 28 ++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/shrink-true16.mir diff --git a/llvm/test/CodeGen/AMDGPU/shrink-true16.mir b/llvm/test/CodeGen/AMDGPU/shrink-true16.mir new file mode 100644 index 0000000000000..4b2e3951ce4c1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/shrink-true16.mir @@ -0,0 +1,28 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=si-shrink-instructions -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX1100 %s + +--- +name: 16bit_lo128_shrink +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr127 + ; GFX1100-LABEL: name: 16bit_lo128_shrink + ; GFX1100: liveins: $vgpr127 + ; GFX1100-NEXT: {{ $}} + ; GFX1100-NEXT: V_CMP_EQ_U16_t16_e32 0, $vgpr127, implicit-def $vcc, implicit $exec, implicit $exec + $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr127, implicit-def $vcc, implicit $exec +... + +--- +name: 16bit_lo128_no_shrink +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr128 + ; GFX1100-LABEL: name: 16bit_lo128_no_shrink + ; GFX1100: liveins: $vgpr128 + ; GFX1100-NEXT: {{ $}} + ; GFX1100-NEXT: $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr128, implicit-def $vcc_lo, implicit $exec + $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr128, implicit-def $vcc, implicit $exec +...