Skip to content

Commit fcebb02

Browse files
author
Tony Varghese
committed
[PowerPC10][XXEVAL] Exploit xxeval instruction for cases of the ternary(A,X, and(B,C)), ternary(A,X,B), ternary(A,X,C), ternary(A,X,xor(B,C)) forms.
1 parent e6529dc commit fcebb02

File tree

5 files changed

+663
-222
lines changed

5 files changed

+663
-222
lines changed

llvm/lib/Target/PowerPC/PPCInstrP10.td

Lines changed: 170 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2159,8 +2159,133 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in {
21592159
(COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
21602160
}
21612161

2162-
class xxevalPattern <dag pattern, bits<8> imm> :
2163-
Pat<(v4i32 pattern), (XXEVAL $vA, $vB, $vC, imm)> {}
2162+
class XXEvalPattern <ValueType vt, dag pattern, bits<8> imm> :
2163+
Pat<(vt pattern), (XXEVAL $vA, $vB, $vC, imm)> {}
2164+
2165+
class DagUnaryVNot<ValueType vt, string opstr>{
2166+
// Defines a class that returns the UnaryVNot dag for an operand string based on a value type.
2167+
dag res = !cond(
2168+
!eq(vt, v4i32) : !dag(vnot, [v4i32], [opstr]),
2169+
!eq(vt, v2i64) : (v2i64 (bitconvert (vnot (v4i32 !dag(bitconvert, [v2i64], [opstr])))))
2170+
);
2171+
}
2172+
2173+
class DagCondVNot<dag d, bit negate> {
2174+
// Defines a class that generates a vnot around the dag.
2175+
dag res = !if(!ne(negate, 0),
2176+
(vnot d),
2177+
d);
2178+
}
2179+
2180+
class XXEvalUnaryNot<ValueType vt> {
2181+
// Defines a wrapper class for unary NOT operations for v4i32 and v2i64 vector types.
2182+
// Unary NOT on operand B or C based on value type.
2183+
dag opB = DagUnaryVNot<vt, "vB">.res;
2184+
dag opC = DagUnaryVNot<vt, "vC">.res;
2185+
}
2186+
2187+
class XXEvalBinaryPattern<ValueType vt, SDPatternOperator op, bit notResult = 0> {
2188+
// Defines a wrapper class for binary patterns with optional NOT on result.
2189+
// Generate op pattern with optional NOT wrapping for result depending on "notResult".
2190+
dag opPat = !cond(
2191+
!eq(vt, v4i32) : DagCondVNot<(op v4i32:$vB, v4i32:$vC), notResult>.res,
2192+
!eq(vt, v2i64) : (v2i64 (bitconvert DagCondVNot<(op
2193+
(v4i32 (bitconvert v2i64:$vB)),
2194+
(v4i32 (bitconvert v2i64:$vC))), notResult>.res))
2195+
);
2196+
}
2197+
2198+
multiclass XXEvalVSelectWithXAnd<ValueType vt, bits<8> baseImm> {
2199+
// Multiclass for Ternary(A, X, and(B, C)) style patterns.
2200+
// Ternary(A, xor(B,C), and(B,C)) => imm: baseImm
2201+
def : XXEvalPattern<vt,
2202+
(vselect vt:$vA, XXEvalBinaryPattern<vt, xor>.opPat, XXEvalBinaryPattern<vt, and>.opPat),
2203+
baseImm>;
2204+
// Ternary(A, nor(B,C), and(B,C)) => imm: baseImm + 2
2205+
def : XXEvalPattern<vt,
2206+
(vselect vt:$vA, XXEvalBinaryPattern<vt, or, 1>.opPat, XXEvalBinaryPattern<vt, and>.opPat),
2207+
!add(baseImm, 2)>;
2208+
// Ternary(A, eqv(B,C), and(B,C)) => imm: baseImm + 3
2209+
def : XXEvalPattern<vt,
2210+
(vselect vt:$vA, XXEvalBinaryPattern<vt, xor, 1>.opPat, XXEvalBinaryPattern<vt, and>.opPat),
2211+
!add(baseImm, 3)>;
2212+
// Ternary(A, not(C), and(B,C)) => imm: baseImm + 4
2213+
def : XXEvalPattern<vt,
2214+
(vselect vt:$vA, XXEvalUnaryNot<vt>.opC, XXEvalBinaryPattern<vt, and>.opPat),
2215+
!add(baseImm, 4)>;
2216+
// Ternary(A, not(B), and(B,C)) => imm: baseImm + 6
2217+
def : XXEvalPattern<vt,
2218+
(vselect vt:$vA, XXEvalUnaryNot<vt>.opB, XXEvalBinaryPattern<vt, and>.opPat),
2219+
!add(baseImm, 6)>;
2220+
}
2221+
2222+
multiclass XXEvalVSelectWithXB<ValueType vt, bits<8> baseImm>{
2223+
// Multiclass for Ternary(A, X, B) style patterns
2224+
// Ternary(A, and(B,C), B) => imm: baseImm
2225+
def : XXEvalPattern<vt,
2226+
(vselect vt:$vA, XXEvalBinaryPattern<vt, and>.opPat, vt:$vB),
2227+
baseImm>;
2228+
// Ternary(A, nor(B,C), B) => imm: baseImm + 7
2229+
def : XXEvalPattern<vt,
2230+
(vselect vt:$vA, XXEvalBinaryPattern<vt, or, 1>.opPat, vt:$vB),
2231+
!add(baseImm, 7)>;
2232+
// Ternary(A, eqv(B,C), B) => imm: baseImm + 8
2233+
def : XXEvalPattern<vt,
2234+
(vselect vt:$vA, XXEvalBinaryPattern<vt, xor, 1>.opPat, vt:$vB),
2235+
!add(baseImm, 8)>;
2236+
// Ternary(A, not(C), B) => imm: baseImm + 9
2237+
def : XXEvalPattern<vt,
2238+
(vselect vt:$vA, XXEvalUnaryNot<vt>.opC, vt:$vB),
2239+
!add(baseImm, 9)>;
2240+
// Ternary(A, nand(B,C), B) => imm: baseImm + 13
2241+
def : XXEvalPattern<vt,
2242+
(vselect vt:$vA, XXEvalBinaryPattern<vt, and, 1>.opPat, vt:$vB),
2243+
!add(baseImm, 13)>;
2244+
}
2245+
2246+
multiclass XXEvalVSelectWithXC<ValueType vt, bits<8> baseImm>{
2247+
// Multiclass for Ternary(A, X, C) style patterns
2248+
// Ternary(A, and(B,C), C) => imm: baseImm
2249+
def : XXEvalPattern<vt,
2250+
(vselect vt:$vA, XXEvalBinaryPattern<vt, and>.opPat, vt:$vC),
2251+
baseImm>;
2252+
// Ternary(A, nor(B,C), C) => imm: baseImm + 7
2253+
def : XXEvalPattern<vt,
2254+
(vselect vt:$vA, XXEvalBinaryPattern<vt, or, 1>.opPat, vt:$vC),
2255+
!add(baseImm, 7)>;
2256+
// Ternary(A, eqv(B,C), C) => imm: baseImm + 8
2257+
def : XXEvalPattern<vt,
2258+
(vselect vt:$vA, XXEvalBinaryPattern<vt, xor, 1>.opPat, vt:$vC),
2259+
!add(baseImm, 8)>;
2260+
// Ternary(A, nand(B,C), C) => imm: baseImm + 13
2261+
def : XXEvalPattern<vt,
2262+
(vselect vt:$vA, XXEvalBinaryPattern<vt, and, 1>.opPat, vt:$vC),
2263+
!add(baseImm, 13)>;
2264+
}
2265+
2266+
multiclass XXEvalVSelectWithXXor<ValueType vt, bits<8> baseImm>{
2267+
// Multiclass for Ternary(A, X, xor(B,C)) style patterns
2268+
// Ternary(A, and(B,C), xor(B,C)) => imm: baseImm
2269+
def : XXEvalPattern<vt,
2270+
(vselect vt:$vA, XXEvalBinaryPattern<vt, and>.opPat, XXEvalBinaryPattern<vt, xor>.opPat),
2271+
baseImm>;
2272+
// Ternary(A, B, xor(B,C)) => imm: baseImm + 2
2273+
def : XXEvalPattern<vt,
2274+
(vselect vt:$vA, vt:$vB, XXEvalBinaryPattern<vt, xor>.opPat),
2275+
!add(baseImm, 2)>;
2276+
// Ternary(A, C, xor(B,C)) => imm: baseImm + 4
2277+
def : XXEvalPattern<vt,
2278+
(vselect vt:$vA, vt:$vC, XXEvalBinaryPattern<vt, xor>.opPat),
2279+
!add(baseImm, 4)>;
2280+
// Ternary(A, or(B,C), xor(B,C)) => imm: baseImm + 6
2281+
def : XXEvalPattern<vt,
2282+
(vselect vt:$vA, XXEvalBinaryPattern<vt, or>.opPat, XXEvalBinaryPattern<vt, xor>.opPat),
2283+
!add(baseImm, 6)>;
2284+
// Ternary(A, nor(B,C), xor(B,C)) => imm: baseImm + 7
2285+
def : XXEvalPattern<vt,
2286+
(vselect vt:$vA, XXEvalBinaryPattern<vt, or, 1>.opPat, XXEvalBinaryPattern<vt, xor>.opPat),
2287+
!add(baseImm, 7)>;
2288+
}
21642289

21652290
let Predicates = [PrefixInstrs, HasP10Vector] in {
21662291
let AddedComplexity = 400 in {
@@ -2192,83 +2317,96 @@ let Predicates = [PrefixInstrs, HasP10Vector] in {
21922317
// Anonymous patterns for XXEVAL
21932318
// AND
21942319
// and(A, B, C)
2195-
def : xxevalPattern<(and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 1>;
2320+
def : XXEvalPattern<v4i32, (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 1>;
21962321
// and(A, xor(B, C))
2197-
def : xxevalPattern<(and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 6>;
2322+
def : XXEvalPattern<v4i32, (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 6>;
21982323
// and(A, or(B, C))
2199-
def : xxevalPattern<(and v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 7>;
2324+
def : XXEvalPattern<v4i32, (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 7>;
22002325
// and(A, nor(B, C))
2201-
def : xxevalPattern<(and v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 8>;
2326+
def : XXEvalPattern<v4i32, (and v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 8>;
22022327
// and(A, eqv(B, C))
2203-
def : xxevalPattern<(and v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 9>;
2328+
def : XXEvalPattern<v4i32, (and v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 9>;
22042329
// and(A, nand(B, C))
2205-
def : xxevalPattern<(and v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 14>;
2330+
def : XXEvalPattern<v4i32, (and v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 14>;
22062331

22072332
// NAND
22082333
// nand(A, B, C)
2209-
def : xxevalPattern<(vnot (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC))),
2334+
def : XXEvalPattern<v4i32, (vnot (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC))),
22102335
!sub(255, 1)>;
22112336
// nand(A, xor(B, C))
2212-
def : xxevalPattern<(vnot (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))),
2337+
def : XXEvalPattern<v4i32, (vnot (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))),
22132338
!sub(255, 6)>;
22142339
// nand(A, or(B, C))
2215-
def : xxevalPattern<(vnot (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC))),
2340+
def : XXEvalPattern<v4i32, (vnot (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC))),
22162341
!sub(255, 7)>;
22172342
// nand(A, nor(B, C))
2218-
def : xxevalPattern<(or (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)),
2343+
def : XXEvalPattern<v4i32, (or (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)),
22192344
!sub(255, 8)>;
22202345
// nand(A, eqv(B, C))
2221-
def : xxevalPattern<(or (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)),
2346+
def : XXEvalPattern<v4i32, (or (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)),
22222347
!sub(255, 9)>;
22232348
// nand(A, nand(B, C))
2224-
def : xxevalPattern<(or (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)),
2349+
def : XXEvalPattern<v4i32, (or (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)),
22252350
!sub(255, 14)>;
22262351

22272352
// EQV
22282353
// (eqv A, B, C)
2229-
def : xxevalPattern<(or (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)),
2354+
def : XXEvalPattern<v4i32, (or (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)),
22302355
(vnot (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC)))),
22312356
150>;
22322357
// (eqv A, (and B, C))
2233-
def : xxevalPattern<(vnot (xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 225>;
2358+
def : XXEvalPattern<v4i32, (vnot (xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 225>;
22342359
// (eqv A, (or B, C))
2235-
def : xxevalPattern<(vnot (xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 135>;
2360+
def : XXEvalPattern<v4i32, (vnot (xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 135>;
22362361

22372362
// NOR
22382363
// (nor A, B, C)
2239-
def : xxevalPattern<(vnot (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 128>;
2364+
def : XXEvalPattern<v4i32, (vnot (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 128>;
22402365
// (nor A, (and B, C))
2241-
def : xxevalPattern<(vnot (or v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 224>;
2366+
def : XXEvalPattern<v4i32, (vnot (or v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 224>;
22422367
// (nor A, (eqv B, C))
2243-
def : xxevalPattern<(and (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), 96>;
2368+
def : XXEvalPattern<v4i32, (and (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), 96>;
22442369
// (nor A, (nand B, C))
2245-
def : xxevalPattern<(and (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), 16>;
2370+
def : XXEvalPattern<v4i32, (and (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), 16>;
22462371
// (nor A, (nor B, C))
2247-
def : xxevalPattern<(and (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), 112>;
2372+
def : XXEvalPattern<v4i32, (and (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), 112>;
22482373
// (nor A, (xor B, C))
2249-
def : xxevalPattern<(vnot (or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), 144>;
2374+
def : XXEvalPattern<v4i32, (vnot (or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), 144>;
22502375

22512376
// OR
22522377
// (or A, B, C)
2253-
def : xxevalPattern<(or v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 127>;
2378+
def : XXEvalPattern<v4i32, (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 127>;
22542379
// (or A, (and B, C))
2255-
def : xxevalPattern<(or v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 31>;
2380+
def : XXEvalPattern<v4i32, (or v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 31>;
22562381
// (or A, (eqv B, C))
2257-
def : xxevalPattern<(or v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 159>;
2382+
def : XXEvalPattern<v4i32, (or v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 159>;
22582383
// (or A, (nand B, C))
2259-
def : xxevalPattern<(or v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 239>;
2384+
def : XXEvalPattern<v4i32, (or v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 239>;
22602385
// (or A, (nor B, C))
2261-
def : xxevalPattern<(or v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 143>;
2386+
def : XXEvalPattern<v4i32, (or v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 143>;
22622387
// (or A, (xor B, C))
2263-
def : xxevalPattern<(or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 111>;
2388+
def : XXEvalPattern<v4i32, (or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 111>;
22642389

22652390
// XOR
22662391
// (xor A, B, C)
2267-
def : xxevalPattern<(xor v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 105>;
2392+
def : XXEvalPattern<v4i32, (xor v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 105>;
22682393
// (xor A, (and B, C))
2269-
def : xxevalPattern<(xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 30>;
2394+
def : XXEvalPattern<v4i32, (xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 30>;
22702395
// (xor A, (or B, C))
2271-
def : xxevalPattern<(xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 120>;
2396+
def : XXEvalPattern<v4i32, (xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 120>;
2397+
2398+
// Ternary operation support with the xxeval instruction.
2399+
defm : XXEvalVSelectWithXAnd<v4i32, 22>;
2400+
defm : XXEvalVSelectWithXAnd<v2i64, 22>;
2401+
2402+
defm : XXEvalVSelectWithXB<v4i32, 49>;
2403+
defm : XXEvalVSelectWithXB<v2i64, 49>;
2404+
2405+
defm : XXEvalVSelectWithXC<v4i32, 81>;
2406+
defm : XXEvalVSelectWithXC<v2i64, 81>;
2407+
2408+
defm : XXEvalVSelectWithXXor<v4i32, 97>;
2409+
defm : XXEvalVSelectWithXXor<v2i64, 97>;
22722410

22732411
// Anonymous patterns to select prefixed VSX loads and stores.
22742412
// Load / Store f128

0 commit comments

Comments
 (0)