From 7f91f1cd85fd5759423e304293dcfd0e33de63bd Mon Sep 17 00:00:00 2001 From: Nabeel Omer Date: Wed, 20 Nov 2024 14:21:39 +0000 Subject: [PATCH 1/5] [X86] Fix decoding for vinsertps immediate operand The relevant bit from the Intel SDM for vinsertps semantics: ``` IF (SRC = REG) THEN COUNT_S := imm8[7:6] ELSE COUNT_S := 0 ``` This is now taken into account. --- llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp | 10 ++++++++-- llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp | 4 ++-- llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.h | 2 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- llvm/test/MC/X86/vinsertps_decode.s | 7 +++++++ 5 files changed, 19 insertions(+), 6 deletions(-) create mode 100644 llvm/test/MC/X86/vinsertps_decode.s diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp index 70c71273d270f..a952ba3e3b0cc 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp @@ -1122,7 +1122,13 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::VINSERTPSrri: case X86::VINSERTPSZrri: Src2Name = getRegName(MI->getOperand(2).getReg()); - [[fallthrough]]; + DestName = getRegName(MI->getOperand(0).getReg()); + Src1Name = getRegName(MI->getOperand(1).getReg()); + if (MI->getOperand(NumOperands - 1).isImm()) + DecodeINSERTPSMask(MI->getOperand(NumOperands - 1).getImm(), + ShuffleMask, false); + break; + case X86::INSERTPSrmi: case X86::VINSERTPSrmi: case X86::VINSERTPSZrmi: @@ -1130,7 +1136,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src1Name = getRegName(MI->getOperand(1).getReg()); if (MI->getOperand(NumOperands - 1).isImm()) DecodeINSERTPSMask(MI->getOperand(NumOperands - 1).getImm(), - ShuffleMask); + ShuffleMask, true); break; case X86::MOVLHPSrr: diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp index 82f4460a42e70..57d0a734bf6ef 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp @@ -23,7 +23,7 @@ namespace llvm { -void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl &ShuffleMask) { +void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl &ShuffleMask, bool SrcIsMem) { // Defaults the copying the dest value. ShuffleMask.push_back(0); ShuffleMask.push_back(1); @@ -33,7 +33,7 @@ void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl &ShuffleMask) { // Decode the immediate. unsigned ZMask = Imm & 15; unsigned CountD = (Imm >> 4) & 3; - unsigned CountS = (Imm >> 6) & 3; + unsigned CountS = SrcIsMem ? 0 : (Imm >> 6) & 3; // CountS selects which input element to use. unsigned InVal = 4 + CountS; diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.h b/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.h index 4ef9959f7a278..cb079648e7be9 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.h @@ -28,7 +28,7 @@ template class SmallVectorImpl; enum { SM_SentinelUndef = -1, SM_SentinelZero = -2 }; /// Decode a 128-bit INSERTPS instruction as a v4f32 shuffle mask. -void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl &ShuffleMask); +void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl &ShuffleMask, bool SrcIsMem); // Insert the bottom Len elements from a second source into a vector starting at // element Idx. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index fea66e9582cfb..65c8d53d0d42d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5362,7 +5362,7 @@ static bool getTargetShuffleMask(SDValue N, bool AllowSentinelZero, assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N.getOperand(1).getValueType() == VT && "Unexpected value type"); ImmN = N.getConstantOperandVal(N.getNumOperands() - 1); - DecodeINSERTPSMask(ImmN, Mask); + DecodeINSERTPSMask(ImmN, Mask, false); IsUnary = IsFakeUnary = N.getOperand(0) == N.getOperand(1); break; case X86ISD::EXTRQI: diff --git a/llvm/test/MC/X86/vinsertps_decode.s b/llvm/test/MC/X86/vinsertps_decode.s new file mode 100644 index 0000000000000..27ba336168064 --- /dev/null +++ b/llvm/test/MC/X86/vinsertps_decode.s @@ -0,0 +1,7 @@ +# RUN: llvm-mc -triple x86_64-unknown-unknown %s | FileCheck %s + +.intel_syntax + +# CHECK: vinsertps {{.*}} # xmm2 = xmm2[0,1,2],mem[0] + +vinsertps xmm2,xmm2,dword ptr [r14+rdi*8+0x4C],0x0B0 \ No newline at end of file From b4a5781e72475c2e3cd378adadb2d774a5bf887d Mon Sep 17 00:00:00 2001 From: Nabeel Omer Date: Wed, 20 Nov 2024 16:45:11 +0000 Subject: [PATCH 2/5] Add newline at EOF --- llvm/test/MC/X86/vinsertps_decode.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/MC/X86/vinsertps_decode.s b/llvm/test/MC/X86/vinsertps_decode.s index 27ba336168064..644f34e4f6d14 100644 --- a/llvm/test/MC/X86/vinsertps_decode.s +++ b/llvm/test/MC/X86/vinsertps_decode.s @@ -4,4 +4,4 @@ # CHECK: vinsertps {{.*}} # xmm2 = xmm2[0,1,2],mem[0] -vinsertps xmm2,xmm2,dword ptr [r14+rdi*8+0x4C],0x0B0 \ No newline at end of file +vinsertps xmm2,xmm2,dword ptr [r14+rdi*8+0x4C],0x0B0 From 48efccb103170a69b0de5abbf600a61115508b0a Mon Sep 17 00:00:00 2001 From: Nabeel Omer Date: Wed, 20 Nov 2024 17:55:22 +0000 Subject: [PATCH 3/5] Address review comments --- llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp | 8 ++++---- llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp | 3 ++- llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.h | 3 ++- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- llvm/test/MC/X86/vinsertps_decode.s | 2 +- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp index a952ba3e3b0cc..9f8bc57fbc76d 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp @@ -1125,8 +1125,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DestName = getRegName(MI->getOperand(0).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg()); if (MI->getOperand(NumOperands - 1).isImm()) - DecodeINSERTPSMask(MI->getOperand(NumOperands - 1).getImm(), - ShuffleMask, false); + DecodeINSERTPSMask(MI->getOperand(NumOperands - 1).getImm(), ShuffleMask, + /*SrcIsMem=*/false); break; case X86::INSERTPSrmi: @@ -1135,8 +1135,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DestName = getRegName(MI->getOperand(0).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg()); if (MI->getOperand(NumOperands - 1).isImm()) - DecodeINSERTPSMask(MI->getOperand(NumOperands - 1).getImm(), - ShuffleMask, true); + DecodeINSERTPSMask(MI->getOperand(NumOperands - 1).getImm(), ShuffleMask, + /*SrcIsMem=*/true); break; case X86::MOVLHPSrr: diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp index 57d0a734bf6ef..933fd16a5cabe 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp @@ -23,7 +23,8 @@ namespace llvm { -void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl &ShuffleMask, bool SrcIsMem) { +void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl &ShuffleMask, + bool SrcIsMem) { // Defaults the copying the dest value. ShuffleMask.push_back(0); ShuffleMask.push_back(1); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.h b/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.h index cb079648e7be9..b58e3a73a8d56 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.h @@ -28,7 +28,8 @@ template class SmallVectorImpl; enum { SM_SentinelUndef = -1, SM_SentinelZero = -2 }; /// Decode a 128-bit INSERTPS instruction as a v4f32 shuffle mask. -void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl &ShuffleMask, bool SrcIsMem); +void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl &ShuffleMask, + bool SrcIsMem); // Insert the bottom Len elements from a second source into a vector starting at // element Idx. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 65c8d53d0d42d..b7972f058ca27 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5362,7 +5362,7 @@ static bool getTargetShuffleMask(SDValue N, bool AllowSentinelZero, assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); assert(N.getOperand(1).getValueType() == VT && "Unexpected value type"); ImmN = N.getConstantOperandVal(N.getNumOperands() - 1); - DecodeINSERTPSMask(ImmN, Mask, false); + DecodeINSERTPSMask(ImmN, Mask, /*SrcIsMem=*/false); IsUnary = IsFakeUnary = N.getOperand(0) == N.getOperand(1); break; case X86ISD::EXTRQI: diff --git a/llvm/test/MC/X86/vinsertps_decode.s b/llvm/test/MC/X86/vinsertps_decode.s index 644f34e4f6d14..fb9846490cafa 100644 --- a/llvm/test/MC/X86/vinsertps_decode.s +++ b/llvm/test/MC/X86/vinsertps_decode.s @@ -2,6 +2,6 @@ .intel_syntax -# CHECK: vinsertps {{.*}} # xmm2 = xmm2[0,1,2],mem[0] +# CHECK: vinsertps $176, 76(%r14,%rdi,8), %xmm2, %xmm2 # xmm2 = xmm2[0,1,2],mem[0] vinsertps xmm2,xmm2,dword ptr [r14+rdi*8+0x4C],0x0B0 From 50554db94a62de6c8ce3f89e0cd27b2c74e1fe90 Mon Sep 17 00:00:00 2001 From: Nabeel Omer Date: Thu, 21 Nov 2024 11:26:28 +0000 Subject: [PATCH 4/5] Add tests for sse and evex --- llvm/test/MC/X86/vinsertps_decode.s | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/llvm/test/MC/X86/vinsertps_decode.s b/llvm/test/MC/X86/vinsertps_decode.s index fb9846490cafa..60efb8fb939ed 100644 --- a/llvm/test/MC/X86/vinsertps_decode.s +++ b/llvm/test/MC/X86/vinsertps_decode.s @@ -2,6 +2,10 @@ .intel_syntax -# CHECK: vinsertps $176, 76(%r14,%rdi,8), %xmm2, %xmm2 # xmm2 = xmm2[0,1,2],mem[0] +# CHECK: insertps $176, (%rax), %xmm2 # xmm2 = xmm2[0,1,2],mem[0] +# CHECK: vinsertps $176, (%rax), %xmm2, %xmm2 # xmm2 = xmm2[0,1,2],mem[0] +# CHECK: vinsertps $176, (%rax), %xmm29, %xmm0 # xmm0 = xmm29[0,1,2],mem[0] -vinsertps xmm2,xmm2,dword ptr [r14+rdi*8+0x4C],0x0B0 +insertps xmm2, dword ptr [rax], 0x0B0 +vinsertps xmm2,xmm2,dword ptr [rax],0x0B0 +vinsertps xmm0,xmm29,dword ptr [rax],0x0B0 \ No newline at end of file From 2c55706a3ee212101a03171024857ba9b2ad0004 Mon Sep 17 00:00:00 2001 From: Nabeel Omer Date: Thu, 21 Nov 2024 11:27:54 +0000 Subject: [PATCH 5/5] Add new line at EOF --- llvm/test/MC/X86/vinsertps_decode.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/MC/X86/vinsertps_decode.s b/llvm/test/MC/X86/vinsertps_decode.s index 60efb8fb939ed..b200fb14aefff 100644 --- a/llvm/test/MC/X86/vinsertps_decode.s +++ b/llvm/test/MC/X86/vinsertps_decode.s @@ -8,4 +8,4 @@ insertps xmm2, dword ptr [rax], 0x0B0 vinsertps xmm2,xmm2,dword ptr [rax],0x0B0 -vinsertps xmm0,xmm29,dword ptr [rax],0x0B0 \ No newline at end of file +vinsertps xmm0,xmm29,dword ptr [rax],0x0B0