From f6658cb22f37d3ab5a7f092b0262fe8347b8cd82 Mon Sep 17 00:00:00 2001 From: Tony Varghese Date: Thu, 29 May 2025 16:33:21 +0000 Subject: [PATCH 1/5] [PowerPC][XXEVAL] Exploit xxeval instruction for cases of the ternary(A,X, and(B,C)), ternary(A,X,B), ternary(A,X,C), ternary(A,X,xor(B,C)) forms. --- llvm/lib/Target/PowerPC/PPCInstrP10.td | 310 ++++++++++++++++-- .../CodeGen/PowerPC/xxeval-vselect-x-and.ll | 82 ++--- .../CodeGen/PowerPC/xxeval-vselect-x-b.ll | 50 +-- .../CodeGen/PowerPC/xxeval-vselect-x-c.ll | 50 +-- .../CodeGen/PowerPC/xxeval-vselect-x-xor.ll | 74 ++--- 5 files changed, 354 insertions(+), 212 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td index d295f35fb1dd0..aa31478589cea 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrP10.td +++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td @@ -2159,8 +2159,254 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in { (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; } -class XXEvalPattern imm> : - Pat<(v4i32 pattern), (XXEVAL $vA, $vB, $vC, imm)> {} + // Defines a pattern for the XXEVAL instruction with a specific value type, + // pattern, and immediate. +class XXEvalPattern imm> : + Pat<(vt pattern), (XXEVAL $vA, $vB, $vC, imm)> {} + + // Helper class to generate binary operation DAGs for various vector types. + // For v4i32, emits (op B C). + // For other types, bitcasts operands to v4i32, applies the op, then bitcasts back. +class BinaryOpDag { + // The DAG for the binary operation. + dag OpDag = !if( !eq(vt, v4i32), + (op vt:$vB, vt:$vC), + (vt (bitconvert (op (v4i32 (bitconvert vt:$vB)), (v4i32 (bitconvert vt:$vC))))) + ); + // The DAG for the binary operation with a NOT applied to the result. + dag VnotOpDag = !if( !eq(vt, v4i32), + (vnot (op vt:$vB, vt:$vC)), + (vt (bitconvert (vnot (op (v4i32 (bitconvert vt:$vB)), (v4i32 (bitconvert vt:$vC)))))) + ); +} + + // Helper class to generate unary NOT patterns for vector types. + // For v4i32, emits (vnot B) or (vnot C). + // For other types, bitcasts operand to v4i32, applies vnot, then bitcasts back. +class XXEvalUnaryNotPattern { + dag vnotB = !if( !eq(vt, v4i32), + (vnot vt:$vB), + (vt (bitconvert (vnot (v4i32 (bitconvert vt:$vB))))) + ); + dag vnotC = !if( !eq(vt, v4i32), + (vnot vt:$vC), + (vt (bitconvert (vnot (v4i32 (bitconvert vt:$vC))))) + ); +} + + // Wrapper class for binary patterns with optional NOT on the result. + // If 'not' is 0, emits the binary op; if 1, emits vnot of the binary op. +class XXEvalBinaryPattern { + dag opPat = !if( !eq(not, 0), + BinaryOpDag.OpDag, + BinaryOpDag.VnotOpDag + ); +} + +multiclass XXEvalVSelectWithXAnd baseImm> { + // Multiclass for ternary patterns of the form vselect(A, X, and(B, C)). + // vselect(A, xor(B,C), and(B,C)) => imm = baseImm = 22 + def : XXEvalPattern.opPat, XXEvalBinaryPattern.opPat), + baseImm>; + // vselect(A, nor(B,C), and(B,C)) => imm = baseImm + 2 = 24 + def : XXEvalPattern.opPat, XXEvalBinaryPattern.opPat), + !add(baseImm, 2)>; + // vselect(A, eqv(B,C), and(B,C)) => imm = baseImm + 3 = 25 + def : XXEvalPattern.opPat, XXEvalBinaryPattern.opPat), + !add(baseImm, 3)>; + // vselect(A, not(C), and(B,C)) => imm = baseImm + 4 = 26 + def : XXEvalPattern.vnotC, XXEvalBinaryPattern.opPat), + !add(baseImm, 4)>; + // vselect(A, not(B), and(B,C)) => imm = baseImm + 6 = 28 + def : XXEvalPattern.vnotB, XXEvalBinaryPattern.opPat), + !add(baseImm, 6)>; +} + +multiclass XXEvalVSelectWithXB baseImm>{ + // Multiclass for ternary patterns of the form vselect(A, X, B). + // vselect(A, and(B,C), B) => imm = baseImm = 49 + def : XXEvalPattern.opPat, vt:$vB), + baseImm>; + // vselect(A, nor(B,C), B) => imm = baseImm + 7 = 56 + def : XXEvalPattern.opPat, vt:$vB), + !add(baseImm, 7)>; + // vselect(A, eqv(B,C), B) => imm = baseImm + 8 = 57 + def : XXEvalPattern.opPat, vt:$vB), + !add(baseImm, 8)>; + // vselect(A, nand(B,C), B) => imm = baseImm + 13 = 62 + def : XXEvalPattern.opPat, vt:$vB), + !add(baseImm, 13)>; +} + +multiclass XXEvalVSelectWithXC baseImm>{ + // Multiclass for ternary patterns of the form vselect(A, X, C). + // vselect(A, and(B,C), C) => imm = baseImm = 81 + def : XXEvalPattern.opPat, vt:$vC), + baseImm>; + // vselect(A, nor(B,C), C) => imm = baseImm + 7 = 88 + def : XXEvalPattern.opPat, vt:$vC), + !add(baseImm, 7)>; + // vselect(A, eqv(B,C), C) => imm = baseImm + 8 = 89 + def : XXEvalPattern.opPat, vt:$vC), + !add(baseImm, 8)>; + // vselect(A, nand(B,C), C) => imm = baseImm + 13 = 94 + def : XXEvalPattern.opPat, vt:$vC), + !add(baseImm, 13)>; +} + +multiclass XXEvalVSelectWithXXor baseImm>{ + // Multiclass for ternary patterns of the form vselect(A, X, xor(B,C)). + // vselect(A, and(B,C), xor(B,C)) => imm = baseImm = 97 + def : XXEvalPattern.opPat, XXEvalBinaryPattern.opPat), + baseImm>; + // vselect(A, B, xor(B,C)) => imm = baseImm + 2 = 99 + def : XXEvalPattern.opPat), + !add(baseImm, 2)>; + // vselect(A, C, xor(B,C)) => imm = baseImm + 4 = 101 + def : XXEvalPattern.opPat), + !add(baseImm, 4)>; + // vselect(A, or(B,C), xor(B,C)) => imm = baseImm + 6 = 103 + def : XXEvalPattern.opPat, XXEvalBinaryPattern.opPat), + !add(baseImm, 6)>; + // vselect(A, nor(B,C), xor(B,C)) => imm = baseImm + 7 = 104 + def : XXEvalPattern.opPat, XXEvalBinaryPattern.opPat), + !add(baseImm, 7)>; +} + + // Pattern class using COPY_TO_REGCLASS for type casting +class XXEvalBitcastPattern imm> : + Pat<(vt pattern), + (COPY_TO_REGCLASS + (XXEVAL + (COPY_TO_REGCLASS vt:$vA, VSRC), + (COPY_TO_REGCLASS vt:$vB, VSRC), + (COPY_TO_REGCLASS vt:$vC, VSRC), + imm), + VRRC)>; + +multiclass XXEvalVSelectWithXAndCast baseImm> { + // Multiclass for ternary patterns using COPY_TO_REGCLASS for unsupported types + // vselect(A, xor(B,C), and(B,C)) => imm = baseImm = 22 + def : XXEvalBitcastPattern.opPat, XXEvalBinaryPattern.opPat), + baseImm>; + // vselect(A, nor(B,C), and(B,C)) => imm = baseImm + 2 = 24 + def : XXEvalBitcastPattern.opPat, XXEvalBinaryPattern.opPat), + !add(baseImm, 2)>; + // vselect(A, eqv(B,C), and(B,C)) => imm = baseImm + 3 = 25 + def : XXEvalBitcastPattern.opPat, XXEvalBinaryPattern.opPat), + !add(baseImm, 3)>; + // vselect(A, not(C), and(B,C)) => imm = baseImm + 4 = 26 + def : XXEvalBitcastPattern.vnotC, XXEvalBinaryPattern.opPat), + !add(baseImm, 4)>; + // vselect(A, not(B), and(B,C)) => imm = baseImm + 6 = 28 + def : XXEvalBitcastPattern.vnotB, XXEvalBinaryPattern.opPat), + !add(baseImm, 6)>; +} + +multiclass XXEvalVSelectWithXBCast baseImm>{ + // vselect(A, and(B,C), B) => imm = baseImm = 49 + def : XXEvalBitcastPattern.opPat, vt:$vB), + baseImm>; + // vselect(A, nor(B,C), B) => imm = baseImm + 7 = 56 + def : XXEvalBitcastPattern.opPat, vt:$vB), + !add(baseImm, 7)>; + // vselect(A, eqv(B,C), B) => imm = baseImm + 8 = 57 + def : XXEvalBitcastPattern.opPat, vt:$vB), + !add(baseImm, 8)>; + // vselect(A, nand(B,C), B) => imm = baseImm + 13 = 62 + def : XXEvalBitcastPattern.opPat, vt:$vB), + !add(baseImm, 13)>; +} + +multiclass XXEvalVSelectWithXCCast baseImm>{ + // vselect(A, and(B,C), C) => imm = baseImm = 81 + def : XXEvalBitcastPattern.opPat, vt:$vC), + baseImm>; + // vselect(A, nor(B,C), C) => imm = baseImm + 7 = 88 + def : XXEvalBitcastPattern.opPat, vt:$vC), + !add(baseImm, 7)>; + // vselect(A, eqv(B,C), C) => imm = baseImm + 8 = 89 + def : XXEvalBitcastPattern.opPat, vt:$vC), + !add(baseImm, 8)>; + // vselect(A, nand(B,C), C) => imm = baseImm + 13 = 94 + def : XXEvalBitcastPattern.opPat, vt:$vC), + !add(baseImm, 13)>; +} + +multiclass XXEvalVSelectWithXXorCast baseImm>{ + // vselect(A, and(B,C), xor(B,C)) => imm = baseImm = 97 + def : XXEvalBitcastPattern.opPat, XXEvalBinaryPattern.opPat), + baseImm>; + // vselect(A, B, xor(B,C)) => imm = baseImm + 2 = 99 + def : XXEvalBitcastPattern.opPat), + !add(baseImm, 2)>; + // vselect(A, C, xor(B,C)) => imm = baseImm + 4 = 101 + def : XXEvalBitcastPattern.opPat), + !add(baseImm, 4)>; + // vselect(A, or(B,C), xor(B,C)) => imm = baseImm + 6 = 103 + def : XXEvalBitcastPattern.opPat, XXEvalBinaryPattern.opPat), + !add(baseImm, 6)>; + // vselect(A, nor(B,C), xor(B,C)) => imm = baseImm + 7 = 104 + def : XXEvalBitcastPattern.opPat, XXEvalBinaryPattern.opPat), + !add(baseImm, 7)>; +} + +// Instantiate XXEval patterns for all vector types +let Predicates = [HasP10Vector] in { + let AddedComplexity = 500 in { + // For types directly supported by XXEVAL (v4i32, v2i64) + foreach type = [v4i32, v2i64] in { + defm : XXEvalVSelectWithXAnd; + defm : XXEvalVSelectWithXB; + defm : XXEvalVSelectWithXC; + defm : XXEvalVSelectWithXXor; + } + + // For types that need COPY_TO_REGCLASS (v8i16, v16i8) + foreach type = [v8i16, v16i8] in { + defm : XXEvalVSelectWithXAndCast; + defm : XXEvalVSelectWithXBCast; + defm : XXEvalVSelectWithXCCast; + defm : XXEvalVSelectWithXXorCast; + } + } +} let Predicates = [PrefixInstrs, HasP10Vector] in { let AddedComplexity = 400 in { @@ -2192,83 +2438,83 @@ let Predicates = [PrefixInstrs, HasP10Vector] in { // Anonymous patterns for XXEVAL // AND // and(A, B, C) - def : XXEvalPattern<(and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 1>; + def : XXEvalPattern; // and(A, xor(B, C)) - def : XXEvalPattern<(and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 6>; + def : XXEvalPattern; // and(A, or(B, C)) - def : XXEvalPattern<(and v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 7>; + def : XXEvalPattern; // and(A, nor(B, C)) - def : XXEvalPattern<(and v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 8>; + def : XXEvalPattern; // and(A, eqv(B, C)) - def : XXEvalPattern<(and v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 9>; + def : XXEvalPattern; // and(A, nand(B, C)) - def : XXEvalPattern<(and v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 14>; + def : XXEvalPattern; // NAND // nand(A, B, C) - def : XXEvalPattern<(vnot (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), + def : XXEvalPattern; // nand(A, xor(B, C)) - def : XXEvalPattern<(vnot (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), + def : XXEvalPattern; // nand(A, or(B, C)) - def : XXEvalPattern<(vnot (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), + def : XXEvalPattern; // nand(A, nor(B, C)) - def : XXEvalPattern<(or (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), + def : XXEvalPattern; // nand(A, eqv(B, C)) - def : XXEvalPattern<(or (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), + def : XXEvalPattern; // nand(A, nand(B, C)) - def : XXEvalPattern<(or (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), + def : XXEvalPattern; // EQV // (eqv A, B, C) - def : XXEvalPattern<(or (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), + def : XXEvalPattern; // (eqv A, (and B, C)) - def : XXEvalPattern<(vnot (xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 225>; + def : XXEvalPattern; // (eqv A, (or B, C)) - def : XXEvalPattern<(vnot (xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 135>; + def : XXEvalPattern; // NOR // (nor A, B, C) - def : XXEvalPattern<(vnot (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 128>; + def : XXEvalPattern; // (nor A, (and B, C)) - def : XXEvalPattern<(vnot (or v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 224>; + def : XXEvalPattern; // (nor A, (eqv B, C)) - def : XXEvalPattern<(and (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), 96>; + def : XXEvalPattern; // (nor A, (nand B, C)) - def : XXEvalPattern<(and (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), 16>; + def : XXEvalPattern; // (nor A, (nor B, C)) - def : XXEvalPattern<(and (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), 112>; + def : XXEvalPattern; // (nor A, (xor B, C)) - def : XXEvalPattern<(vnot (or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), 144>; + def : XXEvalPattern; // OR // (or A, B, C) - def : XXEvalPattern<(or v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 127>; + def : XXEvalPattern; // (or A, (and B, C)) - def : XXEvalPattern<(or v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 31>; + def : XXEvalPattern; // (or A, (eqv B, C)) - def : XXEvalPattern<(or v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 159>; + def : XXEvalPattern; // (or A, (nand B, C)) - def : XXEvalPattern<(or v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 239>; + def : XXEvalPattern; // (or A, (nor B, C)) - def : XXEvalPattern<(or v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 143>; + def : XXEvalPattern; // (or A, (xor B, C)) - def : XXEvalPattern<(or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 111>; + def : XXEvalPattern; // XOR // (xor A, B, C) - def : XXEvalPattern<(xor v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 105>; + def : XXEvalPattern; // (xor A, (and B, C)) - def : XXEvalPattern<(xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 30>; + def : XXEvalPattern; // (xor A, (or B, C)) - def : XXEvalPattern<(xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 120>; + def : XXEvalPattern; // Anonymous patterns to select prefixed VSX loads and stores. // Load / Store f128 diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll index 57d4c48a1aaa2..b41220b01373a 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; Test file to verify the emission of Vector Selection instructions when ternary operators are used. +; Test file to verify the emission of Vector Evaluate instructions when ternary operators are used. ; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \ ; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s @@ -15,11 +15,9 @@ define <4 x i32> @ternary_A_xor_BC_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_xor_BC_and_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 22 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -33,12 +31,10 @@ define <2 x i64> @ternary_A_xor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_xor_BC_and_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 22 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -52,11 +48,9 @@ define <16 x i8> @ternary_A_xor_BC_and_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_xor_BC_and_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 22 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -70,11 +64,9 @@ define <8 x i16> @ternary_A_xor_BC_and_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_xor_BC_and_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 22 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C @@ -88,11 +80,9 @@ define <4 x i32> @ternary_A_nor_BC_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_nor_BC_and_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v4 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 24 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -107,12 +97,10 @@ define <2 x i64> @ternary_A_nor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_nor_BC_and_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v4 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 24 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -127,11 +115,9 @@ define <16 x i8> @ternary_A_nor_BC_and_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_nor_BC_and_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v3, v4 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 24 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -146,11 +132,9 @@ define <8 x i16> @ternary_A_nor_BC_and_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_nor_BC_and_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v3, v4 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 24 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -165,11 +149,9 @@ define <4 x i32> @ternary_A_eqv_BC_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_eqv_BC_and_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxleqv vs0, v3, v4 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 25 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -184,12 +166,10 @@ define <2 x i64> @ternary_A_eqv_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_eqv_BC_and_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxleqv vs0, v3, v4 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 25 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -204,11 +184,9 @@ define <16 x i8> @ternary_A_eqv_BC_and_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_eqv_BC_and_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxleqv vs0, v3, v4 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 25 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -223,11 +201,9 @@ define <8 x i16> @ternary_A_eqv_BC_and_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_eqv_BC_and_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxleqv vs0, v3, v4 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 25 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C @@ -242,11 +218,9 @@ define <4 x i32> @ternary_A_not_C_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_not_C_and_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v4, v4 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 26 ; CHECK-NEXT: blr entry: %not = xor <4 x i32> %C, ; Vector not operation @@ -260,12 +234,10 @@ define <2 x i64> @ternary_A_not_C_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_not_C_and_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v4, v4 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 26 ; CHECK-NEXT: blr entry: %not = xor <2 x i64> %C, ; Vector not operation @@ -279,11 +251,9 @@ define <16 x i8> @ternary_A_not_C_and_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_not_C_and_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v4, v4 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 26 ; CHECK-NEXT: blr entry: %not = xor <16 x i8> %C, ; Vector not operation @@ -297,11 +267,9 @@ define <8 x i16> @ternary_A_not_C_and_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_not_C_and_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v4, v4 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 26 ; CHECK-NEXT: blr entry: %not = xor <8 x i16> %C, ; Vector not operation @@ -315,11 +283,9 @@ define <4 x i32> @ternary_A_not_B_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_not_B_and_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v3 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 28 ; CHECK-NEXT: blr entry: %not = xor <4 x i32> %B, ; Vector not operation @@ -333,12 +299,10 @@ define <2 x i64> @ternary_A_not_B_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_not_B_and_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v3 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 28 ; CHECK-NEXT: blr entry: %not = xor <2 x i64> %B, ; Vector not operation @@ -352,11 +316,9 @@ define <16 x i8> @ternary_A_not_B_and_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_not_B_and_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v3, v3 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 28 ; CHECK-NEXT: blr entry: %not = xor <16 x i8> %B, ; Vector not operation @@ -370,11 +332,9 @@ define <8 x i16> @ternary_A_not_B_and_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_not_B_and_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v3, v3 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 28 ; CHECK-NEXT: blr entry: %not = xor <8 x i16> %B, ; Vector not operation diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll index c366fd5f0a8c2..8fd2453266706 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; Test file to verify the emission of Vector Selection instructions when ternary operators are used. +; Test file to verify the emission of Vector Evaluate instructions when ternary operators are used. ; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \ ; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s @@ -15,10 +15,9 @@ define <4 x i32> @ternary_A_and_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK-LABEL: ternary_A_and_BC_B_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxland vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, v3, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 49 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -31,11 +30,10 @@ define <2 x i64> @ternary_A_and_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_and_BC_B_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxland vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, v3, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 49 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -48,10 +46,9 @@ define <16 x i8> @ternary_A_and_BC_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_and_BC_B_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxland vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, v3, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 49 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -64,10 +61,9 @@ define <8 x i16> @ternary_A_and_BC_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_and_BC_B_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxland vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, v3, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 49 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C @@ -80,10 +76,9 @@ define <4 x i32> @ternary_A_nor_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK-LABEL: ternary_A_nor_BC_B_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, v3, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 56 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -97,11 +92,10 @@ define <2 x i64> @ternary_A_nor_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_nor_BC_B_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, v3, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 56 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -115,10 +109,9 @@ define <16 x i8> @ternary_A_nor_BC_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_nor_BC_B_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, v3, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 56 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -132,10 +125,9 @@ define <8 x i16> @ternary_A_nor_BC_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_nor_BC_B_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, v3, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 56 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -149,10 +141,9 @@ define <4 x i32> @ternary_A_eqv_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK-LABEL: ternary_A_eqv_BC_B_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxleqv vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, v3, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 57 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -166,11 +157,10 @@ define <2 x i64> @ternary_A_eqv_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_eqv_BC_B_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxleqv vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, v3, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 57 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -184,10 +174,9 @@ define <16 x i8> @ternary_A_eqv_BC_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_eqv_BC_B_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxleqv vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, v3, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 57 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -201,10 +190,9 @@ define <8 x i16> @ternary_A_eqv_BC_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_eqv_BC_B_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxleqv vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, v3, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 57 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C @@ -218,10 +206,9 @@ define <4 x i32> @ternary_A_nand_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> ; CHECK-LABEL: ternary_A_nand_BC_B_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, v3, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 62 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -235,11 +222,10 @@ define <2 x i64> @ternary_A_nand_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> ; CHECK-LABEL: ternary_A_nand_BC_B_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, v3, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 62 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -253,10 +239,9 @@ define <16 x i8> @ternary_A_nand_BC_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_nand_BC_B_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, v3, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 62 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -270,10 +255,9 @@ define <8 x i16> @ternary_A_nand_BC_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> ; CHECK-LABEL: ternary_A_nand_BC_B_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, v3, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 62 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll index f70f1d093f069..c25288df78af6 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; Test file to verify the emission of Vector Selection instructions when ternary operators are used. +; Test file to verify the emission of Vector Evaluate instructions when ternary operators are used. ; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \ ; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s @@ -15,10 +15,9 @@ define <4 x i32> @ternary_A_and_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK-LABEL: ternary_A_and_BC_C_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxland vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, v4, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 81 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -31,11 +30,10 @@ define <2 x i64> @ternary_A_and_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_and_BC_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxland vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, v4, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 81 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -48,10 +46,9 @@ define <16 x i8> @ternary_A_and_BC_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_and_BC_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxland vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, v4, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 81 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -64,10 +61,9 @@ define <8 x i16> @ternary_A_and_BC_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_and_BC_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxland vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, v4, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 81 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C @@ -80,10 +76,9 @@ define <4 x i32> @ternary_A_nor_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK-LABEL: ternary_A_nor_BC_C_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, v4, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 88 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -97,11 +92,10 @@ define <2 x i64> @ternary_A_nor_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_nor_BC_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, v4, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 88 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -115,10 +109,9 @@ define <16 x i8> @ternary_A_nor_BC_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_nor_BC_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, v4, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 88 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -132,10 +125,9 @@ define <8 x i16> @ternary_A_nor_BC_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_nor_BC_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, v4, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 88 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -149,10 +141,9 @@ define <4 x i32> @ternary_A_eqv_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK-LABEL: ternary_A_eqv_BC_C_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxleqv vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, v4, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 89 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -166,11 +157,10 @@ define <2 x i64> @ternary_A_eqv_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_eqv_BC_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxleqv vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, v4, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 89 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -184,10 +174,9 @@ define <16 x i8> @ternary_A_eqv_BC_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_eqv_BC_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxleqv vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, v4, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 89 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -201,10 +190,9 @@ define <8 x i16> @ternary_A_eqv_BC_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_eqv_BC_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxleqv vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, v4, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 89 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C @@ -218,10 +206,9 @@ define <4 x i32> @ternary_A_nand_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> ; CHECK-LABEL: ternary_A_nand_BC_C_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, v4, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 94 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -235,11 +222,10 @@ define <2 x i64> @ternary_A_nand_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> ; CHECK-LABEL: ternary_A_nand_BC_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, v4, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 94 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -253,10 +239,9 @@ define <16 x i8> @ternary_A_nand_BC_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_nand_BC_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, v4, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 94 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -270,10 +255,9 @@ define <8 x i16> @ternary_A_nand_BC_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> ; CHECK-LABEL: ternary_A_nand_BC_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, v4, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 94 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll index a3fdc905cb52c..0fc296cc5a4e2 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; Test file to verify the emission of Vector Selection instructions when ternary operators are used. +; Test file to verify the emission of Vector Evaluate instructions when ternary operators are used. ; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \ ; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s @@ -15,11 +15,9 @@ define <4 x i32> @ternary_A_and_BC_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_and_BC_xor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 97 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -33,12 +31,10 @@ define <2 x i64> @ternary_A_and_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_and_BC_xor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 97 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -52,11 +48,9 @@ define <16 x i8> @ternary_A_and_BC_xor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_and_BC_xor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 97 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -70,11 +64,9 @@ define <8 x i16> @ternary_A_and_BC_xor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_and_BC_xor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 97 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C @@ -88,10 +80,9 @@ define <4 x i32> @ternary_A_B_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK-LABEL: ternary_A_B_xor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 99 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -104,11 +95,10 @@ define <2 x i64> @ternary_A_B_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_B_xor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 99 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -121,10 +111,9 @@ define <16 x i8> @ternary_A_B_xor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_B_xor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlxor vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 99 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -137,10 +126,9 @@ define <8 x i16> @ternary_A_B_xor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_B_xor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlxor vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 99 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C @@ -153,10 +141,9 @@ define <4 x i32> @ternary_A_C_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK-LABEL: ternary_A_C_xor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 101 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -169,11 +156,10 @@ define <2 x i64> @ternary_A_C_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_C_xor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 101 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -186,10 +172,9 @@ define <16 x i8> @ternary_A_C_xor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_C_xor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlxor vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 101 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -202,10 +187,9 @@ define <8 x i16> @ternary_A_C_xor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_C_xor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlxor vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 101 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C @@ -218,11 +202,9 @@ define <4 x i32> @ternary_A_or_BC_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_or_BC_xor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 103 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -236,12 +218,10 @@ define <2 x i64> @ternary_A_or_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_or_BC_xor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 103 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -255,11 +235,9 @@ define <16 x i8> @ternary_A_or_BC_xor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_or_BC_xor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 103 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -273,11 +251,9 @@ define <8 x i16> @ternary_A_or_BC_xor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_or_BC_xor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 103 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -291,11 +267,9 @@ define <4 x i32> @ternary_A_nor_BC_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v4 -; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 104 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -310,12 +284,10 @@ define <2 x i64> @ternary_A_nor_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v4 -; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 104 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -330,11 +302,9 @@ define <16 x i8> @ternary_A_nor_BC_xor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v3, v4 -; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 104 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -349,11 +319,9 @@ define <8 x i16> @ternary_A_nor_BC_xor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v3, v4 -; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 104 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C From 7147072a2d76e817d7e814dd49e8db064db7ddb8 Mon Sep 17 00:00:00 2001 From: Tony Varghese Date: Sat, 12 Jul 2025 04:36:40 +0000 Subject: [PATCH 2/5] Updated llvm/lib/Target/PowerPC/PPCInstrP10.td to inline the BinaryOpDag class --- llvm/lib/Target/PowerPC/PPCInstrP10.td | 31 +++++++++----------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td index aa31478589cea..19ef59cf50faa 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrP10.td +++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td @@ -2164,22 +2164,6 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in { class XXEvalPattern imm> : Pat<(vt pattern), (XXEVAL $vA, $vB, $vC, imm)> {} - // Helper class to generate binary operation DAGs for various vector types. - // For v4i32, emits (op B C). - // For other types, bitcasts operands to v4i32, applies the op, then bitcasts back. -class BinaryOpDag { - // The DAG for the binary operation. - dag OpDag = !if( !eq(vt, v4i32), - (op vt:$vB, vt:$vC), - (vt (bitconvert (op (v4i32 (bitconvert vt:$vB)), (v4i32 (bitconvert vt:$vC))))) - ); - // The DAG for the binary operation with a NOT applied to the result. - dag VnotOpDag = !if( !eq(vt, v4i32), - (vnot (op vt:$vB, vt:$vC)), - (vt (bitconvert (vnot (op (v4i32 (bitconvert vt:$vB)), (v4i32 (bitconvert vt:$vC)))))) - ); -} - // Helper class to generate unary NOT patterns for vector types. // For v4i32, emits (vnot B) or (vnot C). // For other types, bitcasts operand to v4i32, applies vnot, then bitcasts back. @@ -2197,10 +2181,15 @@ class XXEvalUnaryNotPattern { // Wrapper class for binary patterns with optional NOT on the result. // If 'not' is 0, emits the binary op; if 1, emits vnot of the binary op. class XXEvalBinaryPattern { - dag opPat = !if( !eq(not, 0), - BinaryOpDag.OpDag, - BinaryOpDag.VnotOpDag - ); + dag opPat = !if(!eq(not, 0), + // DAG for the binary operation. + !if(!eq(vt, v4i32), + (op vt:$vB, vt:$vC), + (vt (bitconvert (op (v4i32 (bitconvert vt:$vB)), (v4i32 (bitconvert vt:$vC)))))), + // DAG for the binary operation with a NOT applied to the result. + !if(!eq(vt, v4i32), + (vnot (op vt:$vB, vt:$vC)), + (vt (bitconvert (vnot (op (v4i32 (bitconvert vt:$vB)), (v4i32 (bitconvert vt:$vC)))))))); } multiclass XXEvalVSelectWithXAnd baseImm> { @@ -2389,7 +2378,7 @@ multiclass XXEvalVSelectWithXXorCast baseImm>{ // Instantiate XXEval patterns for all vector types let Predicates = [HasP10Vector] in { - let AddedComplexity = 500 in { + let AddedComplexity = 400 in { // For types directly supported by XXEVAL (v4i32, v2i64) foreach type = [v4i32, v2i64] in { defm : XXEvalVSelectWithXAnd; From ffbbf9dde3a214e6b8a951f25e25343e2580e854 Mon Sep 17 00:00:00 2001 From: Tony Varghese Date: Mon, 21 Jul 2025 13:53:52 +0000 Subject: [PATCH 3/5] Support only the vselect(A, X, and(B,C)) Operations --- llvm/lib/Target/PowerPC/PPCInstrP10.td | 403 ++++++++---------- .../CodeGen/PowerPC/xxeval-vselect-x-b.ll | 50 ++- .../CodeGen/PowerPC/xxeval-vselect-x-c.ll | 50 ++- .../CodeGen/PowerPC/xxeval-vselect-x-xor.ll | 74 +++- 4 files changed, 290 insertions(+), 287 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td index 19ef59cf50faa..4e314649ff192 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrP10.td +++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td @@ -2159,242 +2159,170 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in { (COPY_TO_REGCLASS $VRB, VSRC), 2)))>; } - // Defines a pattern for the XXEVAL instruction with a specific value type, - // pattern, and immediate. -class XXEvalPattern imm> : - Pat<(vt pattern), (XXEVAL $vA, $vB, $vC, imm)> {} - - // Helper class to generate unary NOT patterns for vector types. - // For v4i32, emits (vnot B) or (vnot C). - // For other types, bitcasts operand to v4i32, applies vnot, then bitcasts back. -class XXEvalUnaryNotPattern { - dag vnotB = !if( !eq(vt, v4i32), - (vnot vt:$vB), - (vt (bitconvert (vnot (v4i32 (bitconvert vt:$vB))))) - ); - dag vnotC = !if( !eq(vt, v4i32), - (vnot vt:$vC), - (vt (bitconvert (vnot (v4i32 (bitconvert vt:$vC))))) - ); -} - - // Wrapper class for binary patterns with optional NOT on the result. - // If 'not' is 0, emits the binary op; if 1, emits vnot of the binary op. -class XXEvalBinaryPattern { - dag opPat = !if(!eq(not, 0), - // DAG for the binary operation. - !if(!eq(vt, v4i32), - (op vt:$vB, vt:$vC), - (vt (bitconvert (op (v4i32 (bitconvert vt:$vB)), (v4i32 (bitconvert vt:$vC)))))), - // DAG for the binary operation with a NOT applied to the result. - !if(!eq(vt, v4i32), - (vnot (op vt:$vB, vt:$vC)), - (vt (bitconvert (vnot (op (v4i32 (bitconvert vt:$vB)), (v4i32 (bitconvert vt:$vC)))))))); -} - -multiclass XXEvalVSelectWithXAnd baseImm> { - // Multiclass for ternary patterns of the form vselect(A, X, and(B, C)). - // vselect(A, xor(B,C), and(B,C)) => imm = baseImm = 22 - def : XXEvalPattern.opPat, XXEvalBinaryPattern.opPat), - baseImm>; - // vselect(A, nor(B,C), and(B,C)) => imm = baseImm + 2 = 24 - def : XXEvalPattern.opPat, XXEvalBinaryPattern.opPat), - !add(baseImm, 2)>; - // vselect(A, eqv(B,C), and(B,C)) => imm = baseImm + 3 = 25 - def : XXEvalPattern.opPat, XXEvalBinaryPattern.opPat), - !add(baseImm, 3)>; - // vselect(A, not(C), and(B,C)) => imm = baseImm + 4 = 26 - def : XXEvalPattern.vnotC, XXEvalBinaryPattern.opPat), - !add(baseImm, 4)>; - // vselect(A, not(B), and(B,C)) => imm = baseImm + 6 = 28 - def : XXEvalPattern.vnotB, XXEvalBinaryPattern.opPat), - !add(baseImm, 6)>; -} - -multiclass XXEvalVSelectWithXB baseImm>{ - // Multiclass for ternary patterns of the form vselect(A, X, B). - // vselect(A, and(B,C), B) => imm = baseImm = 49 - def : XXEvalPattern.opPat, vt:$vB), - baseImm>; - // vselect(A, nor(B,C), B) => imm = baseImm + 7 = 56 - def : XXEvalPattern.opPat, vt:$vB), - !add(baseImm, 7)>; - // vselect(A, eqv(B,C), B) => imm = baseImm + 8 = 57 - def : XXEvalPattern.opPat, vt:$vB), - !add(baseImm, 8)>; - // vselect(A, nand(B,C), B) => imm = baseImm + 13 = 62 - def : XXEvalPattern.opPat, vt:$vB), - !add(baseImm, 13)>; -} - -multiclass XXEvalVSelectWithXC baseImm>{ - // Multiclass for ternary patterns of the form vselect(A, X, C). - // vselect(A, and(B,C), C) => imm = baseImm = 81 - def : XXEvalPattern.opPat, vt:$vC), - baseImm>; - // vselect(A, nor(B,C), C) => imm = baseImm + 7 = 88 - def : XXEvalPattern.opPat, vt:$vC), - !add(baseImm, 7)>; - // vselect(A, eqv(B,C), C) => imm = baseImm + 8 = 89 - def : XXEvalPattern.opPat, vt:$vC), - !add(baseImm, 8)>; - // vselect(A, nand(B,C), C) => imm = baseImm + 13 = 94 - def : XXEvalPattern.opPat, vt:$vC), - !add(baseImm, 13)>; -} - -multiclass XXEvalVSelectWithXXor baseImm>{ - // Multiclass for ternary patterns of the form vselect(A, X, xor(B,C)). - // vselect(A, and(B,C), xor(B,C)) => imm = baseImm = 97 - def : XXEvalPattern.opPat, XXEvalBinaryPattern.opPat), - baseImm>; - // vselect(A, B, xor(B,C)) => imm = baseImm + 2 = 99 - def : XXEvalPattern.opPat), - !add(baseImm, 2)>; - // vselect(A, C, xor(B,C)) => imm = baseImm + 4 = 101 - def : XXEvalPattern.opPat), - !add(baseImm, 4)>; - // vselect(A, or(B,C), xor(B,C)) => imm = baseImm + 6 = 103 - def : XXEvalPattern.opPat, XXEvalBinaryPattern.opPat), - !add(baseImm, 6)>; - // vselect(A, nor(B,C), xor(B,C)) => imm = baseImm + 7 = 104 - def : XXEvalPattern.opPat, XXEvalBinaryPattern.opPat), - !add(baseImm, 7)>; -} - - // Pattern class using COPY_TO_REGCLASS for type casting -class XXEvalBitcastPattern imm> : - Pat<(vt pattern), + // ============================================================================= + // XXEVAL Instruction Pattern Definitions + // ============================================================================= + // + // The XXEVAL instruction can perform ternary equivalent operations + // where the equivalent function is determined by an 8-bit immediate value. + // XXEVAL has the form: xxeval XT,XA,XB,XC,IMM + // Equivalent function A?xor(B,C):and(B,C) is performed if the IMM value is 22. + // + // REGISTER CLASS CONSTRAINTS: + // - XXEVAL natively supports: VSRC register class [v4i32, v4f32, v2f64, v2i64] + // - Other vector types [v16i8, v8i16] require COPY_TO_REGCLASS to/from VRRC + // + // PATTERN STRATEGY: + // - XXEvalPattern: Direct patterns for VSRC-supported types + // - XXEvalVRRC: Patterns with register class conversion for VRRC types + // ============================================================================= + + // Defines a pattern for XXEVAL instruction with native VSRC register class support. + // Used for types that XXEVAL directly supports without register class conversion. +class XXEvalPattern Imm> : + Pat<(Vt Pattern), (XXEVAL $vA, $vB, $vC, Imm)> {} + + // Defines a pattern for XXEVAL instruction requiring VRRC→VSRC register class conversion. + // Used for vector types not natively supported by XXEVAL (v16i8, v8i16). + // Wraps inputs/outputs with COPY_TO_REGCLASS to handle register class mismatch. +class XXEvalVRRC Imm> : + Pat<(Vt Pattern), (COPY_TO_REGCLASS (XXEVAL - (COPY_TO_REGCLASS vt:$vA, VSRC), - (COPY_TO_REGCLASS vt:$vB, VSRC), - (COPY_TO_REGCLASS vt:$vC, VSRC), - imm), + (COPY_TO_REGCLASS Vt:$vA, VSRC), + (COPY_TO_REGCLASS Vt:$vB, VSRC), + (COPY_TO_REGCLASS Vt:$vC, VSRC), + Imm), VRRC)>; -multiclass XXEvalVSelectWithXAndCast baseImm> { - // Multiclass for ternary patterns using COPY_TO_REGCLASS for unsupported types - // vselect(A, xor(B,C), and(B,C)) => imm = baseImm = 22 - def : XXEvalBitcastPattern.opPat, XXEvalBinaryPattern.opPat), - baseImm>; - // vselect(A, nor(B,C), and(B,C)) => imm = baseImm + 2 = 24 - def : XXEvalBitcastPattern.opPat, XXEvalBinaryPattern.opPat), - !add(baseImm, 2)>; - // vselect(A, eqv(B,C), and(B,C)) => imm = baseImm + 3 = 25 - def : XXEvalBitcastPattern.opPat, XXEvalBinaryPattern.opPat), - !add(baseImm, 3)>; - // vselect(A, not(C), and(B,C)) => imm = baseImm + 4 = 26 - def : XXEvalBitcastPattern.vnotC, XXEvalBinaryPattern.opPat), - !add(baseImm, 4)>; - // vselect(A, not(B), and(B,C)) => imm = baseImm + 6 = 28 - def : XXEvalBitcastPattern.vnotB, XXEvalBinaryPattern.opPat), - !add(baseImm, 6)>; -} - -multiclass XXEvalVSelectWithXBCast baseImm>{ - // vselect(A, and(B,C), B) => imm = baseImm = 49 - def : XXEvalBitcastPattern.opPat, vt:$vB), - baseImm>; - // vselect(A, nor(B,C), B) => imm = baseImm + 7 = 56 - def : XXEvalBitcastPattern.opPat, vt:$vB), - !add(baseImm, 7)>; - // vselect(A, eqv(B,C), B) => imm = baseImm + 8 = 57 - def : XXEvalBitcastPattern.opPat, vt:$vB), - !add(baseImm, 8)>; - // vselect(A, nand(B,C), B) => imm = baseImm + 13 = 62 - def : XXEvalBitcastPattern.opPat, vt:$vB), - !add(baseImm, 13)>; -} - -multiclass XXEvalVSelectWithXCCast baseImm>{ - // vselect(A, and(B,C), C) => imm = baseImm = 81 - def : XXEvalBitcastPattern.opPat, vt:$vC), - baseImm>; - // vselect(A, nor(B,C), C) => imm = baseImm + 7 = 88 - def : XXEvalBitcastPattern.opPat, vt:$vC), - !add(baseImm, 7)>; - // vselect(A, eqv(B,C), C) => imm = baseImm + 8 = 89 - def : XXEvalBitcastPattern.opPat, vt:$vC), - !add(baseImm, 8)>; - // vselect(A, nand(B,C), C) => imm = baseImm + 13 = 94 - def : XXEvalBitcastPattern.opPat, vt:$vC), - !add(baseImm, 13)>; -} - -multiclass XXEvalVSelectWithXXorCast baseImm>{ - // vselect(A, and(B,C), xor(B,C)) => imm = baseImm = 97 - def : XXEvalBitcastPattern.opPat, XXEvalBinaryPattern.opPat), - baseImm>; - // vselect(A, B, xor(B,C)) => imm = baseImm + 2 = 99 - def : XXEvalBitcastPattern.opPat), - !add(baseImm, 2)>; - // vselect(A, C, xor(B,C)) => imm = baseImm + 4 = 101 - def : XXEvalBitcastPattern.opPat), - !add(baseImm, 4)>; - // vselect(A, or(B,C), xor(B,C)) => imm = baseImm + 6 = 103 - def : XXEvalBitcastPattern.opPat, XXEvalBinaryPattern.opPat), - !add(baseImm, 6)>; - // vselect(A, nor(B,C), xor(B,C)) => imm = baseImm + 7 = 104 - def : XXEvalBitcastPattern.opPat, XXEvalBinaryPattern.opPat), - !add(baseImm, 7)>; -} - -// Instantiate XXEval patterns for all vector types -let Predicates = [HasP10Vector] in { - let AddedComplexity = 400 in { - // For types directly supported by XXEVAL (v4i32, v2i64) - foreach type = [v4i32, v2i64] in { - defm : XXEvalVSelectWithXAnd; - defm : XXEvalVSelectWithXB; - defm : XXEvalVSelectWithXC; - defm : XXEvalVSelectWithXXor; - } - - // For types that need COPY_TO_REGCLASS (v8i16, v16i8) - foreach type = [v8i16, v16i8] in { - defm : XXEvalVSelectWithXAndCast; - defm : XXEvalVSelectWithXBCast; - defm : XXEvalVSelectWithXCCast; - defm : XXEvalVSelectWithXXorCast; - } - } + // ============================================================================= + // Helper Classes for Type-Aware Operation Generation + // ============================================================================= + // + // These helpers abstract the complexity of handling both XXEVAL-native types + // (v4i32, v2i64) and non-native types (v8i16, v16i8) that require bitcasting. + // + // BITCASTING STRATEGY: + // - For v4i32: Use operation directly (no bitcast needed) + // - For other types: bitcast → v4i32 → operation → bitcast back to original type + // ============================================================================= + + // Generates bitcast-aware unary NOT operations for any vector type. + // Handles the type conversion complexity transparently. + // + // USAGE: XXEvalNot.B generates vnot for $vB operand of type v8i16 + // XXEvalNot.C generates vnot for $vC operand of type v8i16 +class XXEvalNot { + // NOT operation on $vB operand, with type-appropriate bitcasting + dag B = !if(!eq(Vt, v4i32), + (vnot Vt:$vB), // Direct: v4i32 native + (Vt (bitconvert (vnot (v4i32 (bitconvert Vt:$vB)))))); // Bitcast: other types + + // NOT operation on $vC operand, with type-appropriate bitcasting + dag C = !if(!eq(Vt, v4i32), + (vnot Vt:$vC), // Direct: v4i32 native + (Vt (bitconvert (vnot (v4i32 (bitconvert Vt:$vC)))))); // Bitcast: other types +} + + // Generates bitcast-aware binary operations (and, or, xor) for any vector type. + // Supports optional logical inversion of the result (for NOR, EQV operations). + // + // PARAMETERS: + // Vt: Vector type (v4i32, v8i16, v16i8, v2i64) + // Op: Binary operation (and, or, xor) + // Negate: 0=direct operation, 1=NOT(operation) for NOR/EQV patterns + // + // USAGE: XXEvalBinOp.pattern // XOR of two v8i16 operands + // XXEvalBinOp.pattern // NOR of two v8i16 operands (or + not) +class XXEvalBinOp { + dag pattern = !if(!eq(Negate, 0), + // Direct binary operation (and, or, xor) + !if(!eq(Vt, v4i32), + (Op Vt:$vB, Vt:$vC), // Direct: v4i32 native + (Vt (bitconvert (Op (v4i32 (bitconvert Vt:$vB)), // Bitcast: other types + (v4i32 (bitconvert Vt:$vC)))))), + // Inverted binary operation (nor, eqv) + !if(!eq(Vt, v4i32), + (vnot (Op Vt:$vB, Vt:$vC)), // Direct: v4i32 native + (Vt (bitconvert (vnot (Op (v4i32 (bitconvert Vt:$vB)), // Bitcast: other types + (v4i32 (bitconvert Vt:$vC)))))))); +} + + // ============================================================================= + // XXEVAL Ternary Pattern Multiclasses + // ============================================================================= + // + // These multiclasses generate patterns for XXEVAL instructions that implement + // complex ternary boolean functions of the form: vselect(A, f(B,C), g(B,C)) + // + // The specific immediate values correspond to PowerPC XXEVAL instruction + // encodings for various boolean functions. + // ============================================================================= + + // Generates XXEVAL patterns for types with native VSRC register class support. + // Implements: vselect(A, , and(B,C)) + // + // SUPPORTED TYPES: v4i32, v2i64, v4f32, v2f64 (VSRC register class) + // IMMEDIATE ENCODING: BaseImm + offset determines the boolean function +multiclass XXEvalXAnd BaseImm> { + // vselect(A, xor(B,C), and(B,C)) => Imm Value 22 + def : XXEvalPattern.pattern, XXEvalBinOp.pattern), + BaseImm>; + + // vselect(A, nor(B,C), and(B,C)) => Imm Value 24 + def : XXEvalPattern.pattern, XXEvalBinOp.pattern), + !add(BaseImm, 2)>; + + // vselect(A, eqv(B,C), and(B,C)) => Imm Value 25 + // EQV = NOT(XOR) = equivalence operation + def : XXEvalPattern.pattern, XXEvalBinOp.pattern), + !add(BaseImm, 3)>; + + // vselect(A, not(C), and(B,C)) => Imm Value 26 + def : XXEvalPattern.C, XXEvalBinOp.pattern), + !add(BaseImm, 4)>; + + // vselect(A, not(B), and(B,C)) => Imm Value 28 + def : XXEvalPattern.B, XXEvalBinOp.pattern), + !add(BaseImm, 6)>; +} + + // Generates XXEVAL patterns for types requiring VRRC register class conversion. + // Identical boolean functions to XXEvalXAnd, but with register class handling. + // + // SUPPORTED TYPES: v8i16, v16i8 (VRRC register class) + // REGISTER CONVERSION: VRRC → VSRC → XXEVAL → VRRC + // IMMEDIATE ENCODING: Same as XXEvalXAnd (BaseImm + offset) +multiclass XXEvalXAndVRRC BaseImm> { + // vselect(A, xor(B,C), and(B,C)) => Imm Value 22 + def : XXEvalVRRC.pattern, XXEvalBinOp.pattern), + BaseImm>; + + // vselect(A, nor(B,C), and(B,C)) => Imm Value 24 + def : XXEvalVRRC.pattern, XXEvalBinOp.pattern), + !add(BaseImm, 2)>; + + // vselect(A, eqv(B,C), and(B,C)) => Imm Value 25 + def : XXEvalVRRC.pattern, XXEvalBinOp.pattern), + !add(BaseImm, 3)>; + + // vselect(A, not(C), and(B,C)) => Imm Value 26 + def : XXEvalVRRC.C, XXEvalBinOp.pattern), + !add(BaseImm, 4)>; + + // vselect(A, not(B), and(B,C)) => Imm Value 28 + def : XXEvalVRRC.B, XXEvalBinOp.pattern), + !add(BaseImm, 6)>; } let Predicates = [PrefixInstrs, HasP10Vector] in { @@ -2505,6 +2433,17 @@ let Predicates = [PrefixInstrs, HasP10Vector] in { // (xor A, (or B, C)) def : XXEvalPattern; + // Add XXEval Patterns for ternary Operations. + // For VSRC-native types (direct XXEVAL support) + foreach Ty = [v4i32, v2i64] in { + defm : XXEvalXAnd; + } + + // For VRRC types (requiring register class conversion) + foreach Ty = [v8i16, v16i8] in { + defm : XXEvalXAndVRRC; + } + // Anonymous patterns to select prefixed VSX loads and stores. // Load / Store f128 def : Pat<(f128 (load PDForm:$src)), diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll index 8fd2453266706..c366fd5f0a8c2 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; Test file to verify the emission of Vector Evaluate instructions when ternary operators are used. +; Test file to verify the emission of Vector Selection instructions when ternary operators are used. ; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \ ; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s @@ -15,9 +15,10 @@ define <4 x i32> @ternary_A_and_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK-LABEL: ternary_A_and_BC_B_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxland vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 49 +; CHECK-NEXT: xxsel v2, v3, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -30,10 +31,11 @@ define <2 x i64> @ternary_A_and_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_and_BC_B_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxland vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 49 +; CHECK-NEXT: xxsel v2, v3, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -46,9 +48,10 @@ define <16 x i8> @ternary_A_and_BC_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_and_BC_B_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxland vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 49 +; CHECK-NEXT: xxsel v2, v3, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -61,9 +64,10 @@ define <8 x i16> @ternary_A_and_BC_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_and_BC_B_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxland vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 49 +; CHECK-NEXT: xxsel v2, v3, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C @@ -76,9 +80,10 @@ define <4 x i32> @ternary_A_nor_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK-LABEL: ternary_A_nor_BC_B_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 56 +; CHECK-NEXT: xxsel v2, v3, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -92,10 +97,11 @@ define <2 x i64> @ternary_A_nor_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_nor_BC_B_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 56 +; CHECK-NEXT: xxsel v2, v3, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -109,9 +115,10 @@ define <16 x i8> @ternary_A_nor_BC_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_nor_BC_B_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 56 +; CHECK-NEXT: xxsel v2, v3, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -125,9 +132,10 @@ define <8 x i16> @ternary_A_nor_BC_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_nor_BC_B_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 56 +; CHECK-NEXT: xxsel v2, v3, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -141,9 +149,10 @@ define <4 x i32> @ternary_A_eqv_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK-LABEL: ternary_A_eqv_BC_B_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxleqv vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 57 +; CHECK-NEXT: xxsel v2, v3, vs0, v2 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -157,10 +166,11 @@ define <2 x i64> @ternary_A_eqv_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_eqv_BC_B_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxleqv vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 57 +; CHECK-NEXT: xxsel v2, v3, vs0, v2 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -174,9 +184,10 @@ define <16 x i8> @ternary_A_eqv_BC_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_eqv_BC_B_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxleqv vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 57 +; CHECK-NEXT: xxsel v2, v3, vs0, v2 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -190,9 +201,10 @@ define <8 x i16> @ternary_A_eqv_BC_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_eqv_BC_B_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxleqv vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 57 +; CHECK-NEXT: xxsel v2, v3, vs0, v2 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C @@ -206,9 +218,10 @@ define <4 x i32> @ternary_A_nand_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> ; CHECK-LABEL: ternary_A_nand_BC_B_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 62 +; CHECK-NEXT: xxsel v2, v3, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -222,10 +235,11 @@ define <2 x i64> @ternary_A_nand_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> ; CHECK-LABEL: ternary_A_nand_BC_B_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 62 +; CHECK-NEXT: xxsel v2, v3, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -239,9 +253,10 @@ define <16 x i8> @ternary_A_nand_BC_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_nand_BC_B_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 62 +; CHECK-NEXT: xxsel v2, v3, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -255,9 +270,10 @@ define <8 x i16> @ternary_A_nand_BC_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> ; CHECK-LABEL: ternary_A_nand_BC_B_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 62 +; CHECK-NEXT: xxsel v2, v3, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll index c25288df78af6..f70f1d093f069 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; Test file to verify the emission of Vector Evaluate instructions when ternary operators are used. +; Test file to verify the emission of Vector Selection instructions when ternary operators are used. ; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \ ; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s @@ -15,9 +15,10 @@ define <4 x i32> @ternary_A_and_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK-LABEL: ternary_A_and_BC_C_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxland vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 81 +; CHECK-NEXT: xxsel v2, v4, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -30,10 +31,11 @@ define <2 x i64> @ternary_A_and_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_and_BC_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxland vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 81 +; CHECK-NEXT: xxsel v2, v4, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -46,9 +48,10 @@ define <16 x i8> @ternary_A_and_BC_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_and_BC_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxland vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 81 +; CHECK-NEXT: xxsel v2, v4, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -61,9 +64,10 @@ define <8 x i16> @ternary_A_and_BC_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_and_BC_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxland vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 81 +; CHECK-NEXT: xxsel v2, v4, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C @@ -76,9 +80,10 @@ define <4 x i32> @ternary_A_nor_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK-LABEL: ternary_A_nor_BC_C_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 88 +; CHECK-NEXT: xxsel v2, v4, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -92,10 +97,11 @@ define <2 x i64> @ternary_A_nor_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_nor_BC_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 88 +; CHECK-NEXT: xxsel v2, v4, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -109,9 +115,10 @@ define <16 x i8> @ternary_A_nor_BC_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_nor_BC_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 88 +; CHECK-NEXT: xxsel v2, v4, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -125,9 +132,10 @@ define <8 x i16> @ternary_A_nor_BC_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_nor_BC_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 88 +; CHECK-NEXT: xxsel v2, v4, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -141,9 +149,10 @@ define <4 x i32> @ternary_A_eqv_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK-LABEL: ternary_A_eqv_BC_C_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxleqv vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 89 +; CHECK-NEXT: xxsel v2, v4, vs0, v2 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -157,10 +166,11 @@ define <2 x i64> @ternary_A_eqv_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_eqv_BC_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxleqv vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 89 +; CHECK-NEXT: xxsel v2, v4, vs0, v2 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -174,9 +184,10 @@ define <16 x i8> @ternary_A_eqv_BC_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_eqv_BC_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxleqv vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 89 +; CHECK-NEXT: xxsel v2, v4, vs0, v2 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -190,9 +201,10 @@ define <8 x i16> @ternary_A_eqv_BC_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_eqv_BC_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxleqv vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 89 +; CHECK-NEXT: xxsel v2, v4, vs0, v2 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C @@ -206,9 +218,10 @@ define <4 x i32> @ternary_A_nand_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> ; CHECK-LABEL: ternary_A_nand_BC_C_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 94 +; CHECK-NEXT: xxsel v2, v4, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -222,10 +235,11 @@ define <2 x i64> @ternary_A_nand_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> ; CHECK-LABEL: ternary_A_nand_BC_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 94 +; CHECK-NEXT: xxsel v2, v4, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -239,9 +253,10 @@ define <16 x i8> @ternary_A_nand_BC_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_nand_BC_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 94 +; CHECK-NEXT: xxsel v2, v4, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -255,9 +270,10 @@ define <8 x i16> @ternary_A_nand_BC_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> ; CHECK-LABEL: ternary_A_nand_BC_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 94 +; CHECK-NEXT: xxsel v2, v4, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll index 0fc296cc5a4e2..a3fdc905cb52c 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; Test file to verify the emission of Vector Evaluate instructions when ternary operators are used. +; Test file to verify the emission of Vector Selection instructions when ternary operators are used. ; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \ ; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s @@ -15,9 +15,11 @@ define <4 x i32> @ternary_A_and_BC_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_and_BC_xor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxland vs0, v3, v4 +; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 97 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -31,10 +33,12 @@ define <2 x i64> @ternary_A_and_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_and_BC_xor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxland vs0, v3, v4 +; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 97 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -48,9 +52,11 @@ define <16 x i8> @ternary_A_and_BC_xor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_and_BC_xor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxland vs0, v3, v4 +; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 97 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -64,9 +70,11 @@ define <8 x i16> @ternary_A_and_BC_xor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_and_BC_xor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxland vs0, v3, v4 +; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 97 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C @@ -80,9 +88,10 @@ define <4 x i32> @ternary_A_B_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK-LABEL: ternary_A_B_xor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlxor vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 99 +; CHECK-NEXT: xxsel v2, vs0, v3, v2 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -95,10 +104,11 @@ define <2 x i64> @ternary_A_B_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_B_xor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlxor vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 99 +; CHECK-NEXT: xxsel v2, vs0, v3, v2 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -111,9 +121,10 @@ define <16 x i8> @ternary_A_B_xor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_B_xor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlxor vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 99 +; CHECK-NEXT: xxsel v2, vs0, v3, v2 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -126,9 +137,10 @@ define <8 x i16> @ternary_A_B_xor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_B_xor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlxor vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 99 +; CHECK-NEXT: xxsel v2, vs0, v3, v2 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C @@ -141,9 +153,10 @@ define <4 x i32> @ternary_A_C_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK-LABEL: ternary_A_C_xor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlxor vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 101 +; CHECK-NEXT: xxsel v2, vs0, v4, v2 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -156,10 +169,11 @@ define <2 x i64> @ternary_A_C_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_C_xor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlxor vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 101 +; CHECK-NEXT: xxsel v2, vs0, v4, v2 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -172,9 +186,10 @@ define <16 x i8> @ternary_A_C_xor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_C_xor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlxor vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 101 +; CHECK-NEXT: xxsel v2, vs0, v4, v2 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -187,9 +202,10 @@ define <8 x i16> @ternary_A_C_xor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_C_xor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlxor vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 101 +; CHECK-NEXT: xxsel v2, vs0, v4, v2 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C @@ -202,9 +218,11 @@ define <4 x i32> @ternary_A_or_BC_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_or_BC_xor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlor vs0, v3, v4 +; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 103 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -218,10 +236,12 @@ define <2 x i64> @ternary_A_or_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_or_BC_xor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlor vs0, v3, v4 +; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 103 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -235,9 +255,11 @@ define <16 x i8> @ternary_A_or_BC_xor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_or_BC_xor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlor vs0, v3, v4 +; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 103 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -251,9 +273,11 @@ define <8 x i16> @ternary_A_or_BC_xor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_or_BC_xor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlor vs0, v3, v4 +; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 103 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -267,9 +291,11 @@ define <4 x i32> @ternary_A_nor_BC_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v3, v4 +; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 104 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -284,10 +310,12 @@ define <2 x i64> @ternary_A_nor_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v3, v4 +; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 104 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -302,9 +330,11 @@ define <16 x i8> @ternary_A_nor_BC_xor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxlnor vs0, v3, v4 +; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 104 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -319,9 +349,11 @@ define <8 x i16> @ternary_A_nor_BC_xor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxlnor vs0, v3, v4 +; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 104 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C From 47d76ba7b0f4cdaee47f61717654e0496add3ad3 Mon Sep 17 00:00:00 2001 From: Tony Varghese Date: Mon, 21 Jul 2025 13:53:52 +0000 Subject: [PATCH 4/5] Support only the vselect(A, X, and(B,C)) Operations --- llvm/lib/Target/PowerPC/PPCInstrP10.td | 85 +++++++++++++------------- 1 file changed, 41 insertions(+), 44 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td index 4e314649ff192..898a245f76e91 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrP10.td +++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td @@ -2204,15 +2204,16 @@ class XXEvalVRRC Imm> : // // BITCASTING STRATEGY: // - For v4i32: Use operation directly (no bitcast needed) - // - For other types: bitcast → v4i32 → operation → bitcast back to original type + // - For other types: Bit operations happens in v4i32 and requires bitcasting. + // bitcast → v4i32 → operation → bitcast back to original type // ============================================================================= // Generates bitcast-aware unary NOT operations for any vector type. // Handles the type conversion complexity transparently. // - // USAGE: XXEvalNot.B generates vnot for $vB operand of type v8i16 - // XXEvalNot.C generates vnot for $vC operand of type v8i16 -class XXEvalNot { + // USAGE: Not.B generates vnot for $vB operand of type v8i16 + // Not.C generates vnot for $vC operand of type v8i16 +class Not { // NOT operation on $vB operand, with type-appropriate bitcasting dag B = !if(!eq(Vt, v4i32), (vnot Vt:$vB), // Direct: v4i32 native @@ -2232,97 +2233,93 @@ class XXEvalNot { // Op: Binary operation (and, or, xor) // Negate: 0=direct operation, 1=NOT(operation) for NOR/EQV patterns // - // USAGE: XXEvalBinOp.pattern // XOR of two v8i16 operands - // XXEvalBinOp.pattern // NOR of two v8i16 operands (or + not) + // USAGE: XXEvalBinOp.BC // XOR of two v8i16 operands + // XXEvalBinOp.BC // NOR of two v8i16 operands (or + not) class XXEvalBinOp { - dag pattern = !if(!eq(Negate, 0), + dag BC = !if(!eq(Negate, 0), // Direct binary operation (and, or, xor) !if(!eq(Vt, v4i32), (Op Vt:$vB, Vt:$vC), // Direct: v4i32 native (Vt (bitconvert (Op (v4i32 (bitconvert Vt:$vB)), // Bitcast: other types - (v4i32 (bitconvert Vt:$vC)))))), + (v4i32 (bitconvert Vt:$vC)))))), // Inverted binary operation (nor, eqv) !if(!eq(Vt, v4i32), (vnot (Op Vt:$vB, Vt:$vC)), // Direct: v4i32 native (Vt (bitconvert (vnot (Op (v4i32 (bitconvert Vt:$vB)), // Bitcast: other types - (v4i32 (bitconvert Vt:$vC)))))))); + (v4i32 (bitconvert Vt:$vC)))))))); } + // Pattern class for common binary bit operations +class And : XXEvalBinOp; +class Or : XXEvalBinOp; +class Xor : XXEvalBinOp; +class Nor : XXEvalBinOp; // or + invert +class Eqv : XXEvalBinOp; // xor + invert // ============================================================================= // XXEVAL Ternary Pattern Multiclasses // ============================================================================= // // These multiclasses generate patterns for XXEVAL instructions that implement - // complex ternary boolean functions of the form: vselect(A, f(B,C), g(B,C)) + // complex ternary equivalent operations of the form: vselect(A, f(B,C), g(B,C)) // // The specific immediate values correspond to PowerPC XXEVAL instruction - // encodings for various boolean functions. + // encodings for various ternary equivalent operations. // ============================================================================= // Generates XXEVAL patterns for types with native VSRC register class support. // Implements: vselect(A, , and(B,C)) // - // SUPPORTED TYPES: v4i32, v2i64, v4f32, v2f64 (VSRC register class) - // IMMEDIATE ENCODING: BaseImm + offset determines the boolean function + // SUPPORTED TYPES: v4i32, v2i64 (VSRC register class) + // IMMEDIATE ENCODING: BaseImm + offset determines the ternary equivalent operation multiclass XXEvalXAnd BaseImm> { - // vselect(A, xor(B,C), and(B,C)) => Imm Value 22 + // vselect(A, xor(B,C), and(B,C)) : xxeval Imm Value is 22 def : XXEvalPattern.pattern, XXEvalBinOp.pattern), - BaseImm>; + (vselect Vt:$vA, Xor.BC, And.BC), BaseImm>; - // vselect(A, nor(B,C), and(B,C)) => Imm Value 24 + // vselect(A, nor(B,C), and(B,C)) : xxeval Imm Value is 24 def : XXEvalPattern.pattern, XXEvalBinOp.pattern), - !add(BaseImm, 2)>; + (vselect Vt:$vA, Nor.BC, And.BC), !add(BaseImm, 2)>; - // vselect(A, eqv(B,C), and(B,C)) => Imm Value 25 + // vselect(A, eqv(B,C), and(B,C)) : xxeval Imm Value is 25 // EQV = NOT(XOR) = equivalence operation def : XXEvalPattern.pattern, XXEvalBinOp.pattern), - !add(BaseImm, 3)>; + (vselect Vt:$vA, Eqv.BC, And.BC), !add(BaseImm, 3)>; - // vselect(A, not(C), and(B,C)) => Imm Value 26 + // vselect(A, not(C), and(B,C)) : xxeval Imm Value is 26 def : XXEvalPattern.C, XXEvalBinOp.pattern), - !add(BaseImm, 4)>; + (vselect Vt:$vA, Not.C, And.BC), !add(BaseImm, 4)>; - // vselect(A, not(B), and(B,C)) => Imm Value 28 + // vselect(A, not(B), and(B,C)) : xxeval Imm Value is 28 def : XXEvalPattern.B, XXEvalBinOp.pattern), - !add(BaseImm, 6)>; + (vselect Vt:$vA, Not.B, And.BC), !add(BaseImm, 6)>; } // Generates XXEVAL patterns for types requiring VRRC register class conversion. - // Identical boolean functions to XXEvalXAnd, but with register class handling. + // Identical equivalent operations to XXEvalXAnd, but with register class handling. // // SUPPORTED TYPES: v8i16, v16i8 (VRRC register class) // REGISTER CONVERSION: VRRC → VSRC → XXEVAL → VRRC // IMMEDIATE ENCODING: Same as XXEvalXAnd (BaseImm + offset) multiclass XXEvalXAndVRRC BaseImm> { - // vselect(A, xor(B,C), and(B,C)) => Imm Value 22 + // vselect(A, xor(B,C), and(B,C)) : xxeval Imm Value is 22 def : XXEvalVRRC.pattern, XXEvalBinOp.pattern), - BaseImm>; + (vselect Vt:$vA, Xor.BC, And.BC), BaseImm>; - // vselect(A, nor(B,C), and(B,C)) => Imm Value 24 + // vselect(A, nor(B,C), and(B,C)) : xxeval Imm Value is 24 def : XXEvalVRRC.pattern, XXEvalBinOp.pattern), - !add(BaseImm, 2)>; + (vselect Vt:$vA, Nor.BC, And.BC), !add(BaseImm, 2)>; - // vselect(A, eqv(B,C), and(B,C)) => Imm Value 25 + // vselect(A, eqv(B,C), and(B,C)) : xxeval Imm Value is 25 def : XXEvalVRRC.pattern, XXEvalBinOp.pattern), - !add(BaseImm, 3)>; + (vselect Vt:$vA, Eqv.BC, And.BC), !add(BaseImm, 3)>; - // vselect(A, not(C), and(B,C)) => Imm Value 26 + // vselect(A, not(C), and(B,C)) : xxeval Imm Value is 26 def : XXEvalVRRC.C, XXEvalBinOp.pattern), - !add(BaseImm, 4)>; + (vselect Vt:$vA, Not.C, And.BC), !add(BaseImm, 4)>; - // vselect(A, not(B), and(B,C)) => Imm Value 28 + // vselect(A, not(B), and(B,C)) : xxeval Imm Value is 28 def : XXEvalVRRC.B, XXEvalBinOp.pattern), - !add(BaseImm, 6)>; + (vselect Vt:$vA, Not.B, And.BC), !add(BaseImm, 6)>; } let Predicates = [PrefixInstrs, HasP10Vector] in { From 3e7895b91e90a14421ee561a05b249634bb823dd Mon Sep 17 00:00:00 2001 From: Tony Varghese Date: Mon, 21 Jul 2025 17:41:15 +0000 Subject: [PATCH 5/5] Unified VRRC and VSRC handling, hardcorded the imm values --- llvm/lib/Target/PowerPC/PPCInstrP10.td | 254 +++++++++++-------------- 1 file changed, 109 insertions(+), 145 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td index 898a245f76e91..39165f9122ea1 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrP10.td +++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td @@ -2163,9 +2163,10 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in { // XXEVAL Instruction Pattern Definitions // ============================================================================= // - // The XXEVAL instruction can perform ternary equivalent operations - // where the equivalent function is determined by an 8-bit immediate value. - // XXEVAL has the form: xxeval XT,XA,XB,XC,IMM + // XXEVAL instruction performs 256 different logical operations on three vector + // operands using an 8-bit immediate value to select the operation. + // Format: xxeval XT, XA, XB, XC, IMM + // For example: // Equivalent function A?xor(B,C):and(B,C) is performed if the IMM value is 22. // // REGISTER CLASS CONSTRAINTS: @@ -2173,33 +2174,27 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in { // - Other vector types [v16i8, v8i16] require COPY_TO_REGCLASS to/from VRRC // // PATTERN STRATEGY: - // - XXEvalPattern: Direct patterns for VSRC-supported types - // - XXEvalVRRC: Patterns with register class conversion for VRRC types + // - XXEvalPattern: Class that automatically handles VSRC/VRRC conversion. // ============================================================================= - // Defines a pattern for XXEVAL instruction with native VSRC register class support. - // Used for types that XXEVAL directly supports without register class conversion. -class XXEvalPattern Imm> : - Pat<(Vt Pattern), (XXEVAL $vA, $vB, $vC, Imm)> {} - - // Defines a pattern for XXEVAL instruction requiring VRRC→VSRC register class conversion. - // Used for vector types not natively supported by XXEVAL (v16i8, v8i16). - // Wraps inputs/outputs with COPY_TO_REGCLASS to handle register class mismatch. -class XXEvalVRRC Imm> : - Pat<(Vt Pattern), - (COPY_TO_REGCLASS - (XXEVAL - (COPY_TO_REGCLASS Vt:$vA, VSRC), - (COPY_TO_REGCLASS Vt:$vB, VSRC), - (COPY_TO_REGCLASS Vt:$vC, VSRC), - Imm), - VRRC)>; + // Auto-detect if type needs VRRC register class conversion +class IsVRRCType { + bit Res = !or(!eq(Vt, v8i16), !eq(Vt, v16i8)); +} + // Defines a helper class that automatically handles both VSRC and VRRC types +class XXEvalPattern Imm> { + dag Instr = !if(IsVRRCType.Res, + // VRRC path: wrap with COPY_TO_REGCLASS + (COPY_TO_REGCLASS + (XXEVAL (COPY_TO_REGCLASS Vt:$vA, VSRC), + (COPY_TO_REGCLASS Vt:$vB, VSRC), + (COPY_TO_REGCLASS Vt:$vC, VSRC), Imm), VRRC), + // VSRC path: direct XXEVAL + (XXEVAL $vA, $vB, $vC, Imm)); +} // ============================================================================= - // Helper Classes for Type-Aware Operation Generation - // ============================================================================= - // - // These helpers abstract the complexity of handling both XXEVAL-native types + // Helper Classes abstract the complexity of handling both XXEVAL-native types // (v4i32, v2i64) and non-native types (v8i16, v16i8) that require bitcasting. // // BITCASTING STRATEGY: @@ -2208,6 +2203,7 @@ class XXEvalVRRC Imm> : // bitcast → v4i32 → operation → bitcast back to original type // ============================================================================= + // ============================================================================= // Generates bitcast-aware unary NOT operations for any vector type. // Handles the type conversion complexity transparently. // @@ -2225,6 +2221,7 @@ class Not { (Vt (bitconvert (vnot (v4i32 (bitconvert Vt:$vC)))))); // Bitcast: other types } + // ============================================================================= // Generates bitcast-aware binary operations (and, or, xor) for any vector type. // Supports optional logical inversion of the result (for NOR, EQV operations). // @@ -2248,12 +2245,13 @@ class XXEvalBinOp { (Vt (bitconvert (vnot (Op (v4i32 (bitconvert Vt:$vB)), // Bitcast: other types (v4i32 (bitconvert Vt:$vC)))))))); } + // ============================================================================= // Pattern class for common binary bit operations class And : XXEvalBinOp; class Or : XXEvalBinOp; class Xor : XXEvalBinOp; -class Nor : XXEvalBinOp; // or + invert -class Eqv : XXEvalBinOp; // xor + invert +class Nor : XXEvalBinOp; // not(or) +class Eqv : XXEvalBinOp; // not(xor) // ============================================================================= // XXEVAL Ternary Pattern Multiclasses @@ -2266,60 +2264,32 @@ class Eqv : XXEvalBinOp; // xor + invert // encodings for various ternary equivalent operations. // ============================================================================= - // Generates XXEVAL patterns for types with native VSRC register class support. + // Generates XXEVAL patterns for bot VSRC and VRRC types using XXEvalPattern class // Implements: vselect(A, , and(B,C)) + // include and(B,C), xor(B,C), nor(B,C), eqv(B,C), not(C), not(B) // - // SUPPORTED TYPES: v4i32, v2i64 (VSRC register class) - // IMMEDIATE ENCODING: BaseImm + offset determines the ternary equivalent operation -multiclass XXEvalXAnd BaseImm> { - // vselect(A, xor(B,C), and(B,C)) : xxeval Imm Value is 22 - def : XXEvalPattern.BC, And.BC), BaseImm>; - + // SUPPORTED TYPES: v4i32, v2i64, v16i8, v8i16 + // IMMEDIATE ENCODING determines the ternary equivalent operation of xxeval instruction. +multiclass XXEvalXAnd { + /// vselect(A, xor(B,C), and(B,C)) : xxeval Imm Value is 22 + def : Pat<(Vt (vselect Vt:$vA, Xor.BC, And.BC)), + XXEvalPattern.Instr>; + // vselect(A, nor(B,C), and(B,C)) : xxeval Imm Value is 24 - def : XXEvalPattern.BC, And.BC), !add(BaseImm, 2)>; - + def : Pat<(Vt (vselect Vt:$vA, Nor.BC, And.BC)), + XXEvalPattern.Instr>; + // vselect(A, eqv(B,C), and(B,C)) : xxeval Imm Value is 25 - // EQV = NOT(XOR) = equivalence operation - def : XXEvalPattern.BC, And.BC), !add(BaseImm, 3)>; - + def : Pat<(Vt (vselect Vt:$vA, Eqv.BC, And.BC)), + XXEvalPattern.Instr>; + // vselect(A, not(C), and(B,C)) : xxeval Imm Value is 26 - def : XXEvalPattern.C, And.BC), !add(BaseImm, 4)>; - + def : Pat<(Vt (vselect Vt:$vA, Not.C, And.BC)), + XXEvalPattern.Instr>; + // vselect(A, not(B), and(B,C)) : xxeval Imm Value is 28 - def : XXEvalPattern.B, And.BC), !add(BaseImm, 6)>; -} - - // Generates XXEVAL patterns for types requiring VRRC register class conversion. - // Identical equivalent operations to XXEvalXAnd, but with register class handling. - // - // SUPPORTED TYPES: v8i16, v16i8 (VRRC register class) - // REGISTER CONVERSION: VRRC → VSRC → XXEVAL → VRRC - // IMMEDIATE ENCODING: Same as XXEvalXAnd (BaseImm + offset) -multiclass XXEvalXAndVRRC BaseImm> { - // vselect(A, xor(B,C), and(B,C)) : xxeval Imm Value is 22 - def : XXEvalVRRC.BC, And.BC), BaseImm>; - - // vselect(A, nor(B,C), and(B,C)) : xxeval Imm Value is 24 - def : XXEvalVRRC.BC, And.BC), !add(BaseImm, 2)>; - - // vselect(A, eqv(B,C), and(B,C)) : xxeval Imm Value is 25 - def : XXEvalVRRC.BC, And.BC), !add(BaseImm, 3)>; - - // vselect(A, not(C), and(B,C)) : xxeval Imm Value is 26 - def : XXEvalVRRC.C, And.BC), !add(BaseImm, 4)>; - - // vselect(A, not(B), and(B,C)) : xxeval Imm Value is 28 - def : XXEvalVRRC.B, And.BC), !add(BaseImm, 6)>; + def : Pat<(Vt (vselect Vt:$vA, Not.B, And.BC)), + XXEvalPattern.Instr>; } let Predicates = [PrefixInstrs, HasP10Vector] in { @@ -2352,93 +2322,87 @@ let Predicates = [PrefixInstrs, HasP10Vector] in { // Anonymous patterns for XXEVAL // AND // and(A, B, C) - def : XXEvalPattern; - // and(A, xor(B, C)) - def : XXEvalPattern; - // and(A, or(B, C)) - def : XXEvalPattern; - // and(A, nor(B, C)) - def : XXEvalPattern; - // and(A, eqv(B, C)) - def : XXEvalPattern; - // and(A, nand(B, C)) - def : XXEvalPattern; + def : Pat<(v4i32 (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (and v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC)))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (and v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC)))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (and v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC)))), + XXEvalPattern.Instr>; // NAND // nand(A, B, C) - def : XXEvalPattern; - // nand(A, xor(B, C)) - def : XXEvalPattern; - // nand(A, or(B, C)) - def : XXEvalPattern; - // nand(A, nor(B, C)) - def : XXEvalPattern; - // nand(A, eqv(B, C)) - def : XXEvalPattern; - // nand(A, nand(B, C)) - def : XXEvalPattern; + def : Pat<(v4i32 (vnot (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (vnot (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (vnot (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC)))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (or (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (or (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (or (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC))), + XXEvalPattern.Instr>; // EQV // (eqv A, B, C) - def : XXEvalPattern; - // (eqv A, (and B, C)) - def : XXEvalPattern; - // (eqv A, (or B, C)) - def : XXEvalPattern; + def : Pat<(v4i32 (or (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), + (vnot (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC))))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (vnot (xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC)))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (vnot (xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC)))), + XXEvalPattern.Instr>; // NOR // (nor A, B, C) - def : XXEvalPattern; - // (nor A, (and B, C)) - def : XXEvalPattern; - // (nor A, (eqv B, C)) - def : XXEvalPattern; - // (nor A, (nand B, C)) - def : XXEvalPattern; - // (nor A, (nor B, C)) - def : XXEvalPattern; - // (nor A, (xor B, C)) - def : XXEvalPattern; + def : Pat<(v4i32 (vnot (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC)))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (vnot (or v4i32:$vA, (and v4i32:$vB, v4i32:$vC)))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (and (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (and (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (and (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (vnot (or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)))), + XXEvalPattern.Instr>; // OR // (or A, B, C) - def : XXEvalPattern; - // (or A, (and B, C)) - def : XXEvalPattern; - // (or A, (eqv B, C)) - def : XXEvalPattern; - // (or A, (nand B, C)) - def : XXEvalPattern; - // (or A, (nor B, C)) - def : XXEvalPattern; - // (or A, (xor B, C)) - def : XXEvalPattern; + def : Pat<(v4i32 (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (or v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (or v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC)))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (or v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC)))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (or v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC)))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), + XXEvalPattern.Instr>; // XOR // (xor A, B, C) - def : XXEvalPattern; - // (xor A, (and B, C)) - def : XXEvalPattern; - // (xor A, (or B, C)) - def : XXEvalPattern; + def : Pat<(v4i32 (xor v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), + XXEvalPattern.Instr>; + def : Pat<(v4i32 (xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), + XXEvalPattern.Instr>; // Add XXEval Patterns for ternary Operations. // For VSRC-native types (direct XXEVAL support) - foreach Ty = [v4i32, v2i64] in { - defm : XXEvalXAnd; - } - - // For VRRC types (requiring register class conversion) - foreach Ty = [v8i16, v16i8] in { - defm : XXEvalXAndVRRC; + foreach Ty = [v4i32, v2i64, v8i16, v16i8] in { + defm : XXEvalXAnd; } // Anonymous patterns to select prefixed VSX loads and stores.