Skip to content

Commit f20275c

Browse files
author
Tony Varghese
committed
[PowerPC10][XXEVAL] Exploit xxeval instruction for cases of the ternary(A,X, and(B,C)), ternary(A,X,B), ternary(A,X,C), ternary(A,X,xor(B,C)) forms.
1 parent e6529dc commit f20275c

File tree

5 files changed

+210
-128
lines changed

5 files changed

+210
-128
lines changed

llvm/lib/Target/PowerPC/PPCInstrP10.td

Lines changed: 168 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2159,8 +2159,131 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in {
21592159
(COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
21602160
}
21612161

2162-
class xxevalPattern <dag pattern, bits<8> imm> :
2163-
Pat<(v4i32 pattern), (XXEVAL $vA, $vB, $vC, imm)> {}
2162+
class xxevalPattern <ValueType vt, dag pattern, bits<8> imm> :
2163+
Pat<(vt pattern), (XXEVAL $vA, $vB, $vC, imm)> {}
2164+
2165+
class DagCondVNot<dag d, bit negate> {
2166+
// Utility to define a vnot around the dag.
2167+
dag res = !if(!ne(negate, 0),
2168+
(vnot d),
2169+
d);
2170+
}
2171+
2172+
class XXEvalUnaryPattern<ValueType vt> {
2173+
// vnot Operand B
2174+
dag vnotB = !cond(
2175+
!eq(vt, v4i32) : (vnot v4i32:$vB),
2176+
!eq(vt, v2i64) : (v2i64 (bitconvert (vnot (v4i32 (bitconvert v2i64:$vB)))))
2177+
);
2178+
// vnot Operand C
2179+
dag vnotC = !cond(
2180+
!eq(vt, v4i32) : (vnot v4i32:$vC),
2181+
!eq(vt, v2i64) : (v2i64 (bitconvert (vnot (v4i32 (bitconvert v2i64:$vC)))))
2182+
);
2183+
}
2184+
2185+
class XXEvalBinaryPattern<ValueType vt, SDPatternOperator op, bit notResult = 0> {
2186+
// Defines a wrapper class for binary patterns with optional NOT on result.
2187+
// Generate op pattern with optional NOT wrapping for result depending on "notResult".
2188+
dag opPat = !cond(
2189+
!eq(vt, v4i32) : DagCondVNot<(op v4i32:$vB, v4i32:$vC), notResult>.res,
2190+
!eq(vt, v2i64) : (v2i64 (bitconvert DagCondVNot<(op
2191+
(v4i32 (bitconvert v2i64:$vB)),
2192+
(v4i32 (bitconvert v2i64:$vC))), notResult>.res))
2193+
);
2194+
}
2195+
2196+
multiclass XXEvalVSelectWithXAnd<ValueType vt, bits<8> baseImm> {
2197+
// Multiclass for Ternary(A, X, and(B, C)) style patterns.
2198+
// Ternary(A, xor(B,C), and(B,C)) => imm: baseImm
2199+
def : xxevalPattern<vt,
2200+
(vselect vt:$vA, XXEvalBinaryPattern<vt, xor>.opPat, XXEvalBinaryPattern<vt, and>.opPat),
2201+
baseImm>;
2202+
// Ternary(A, nor(B,C), and(B,C)) => imm: baseImm + 2
2203+
def : xxevalPattern<vt,
2204+
(vselect vt:$vA, XXEvalBinaryPattern<vt, or, 1>.opPat, XXEvalBinaryPattern<vt, and>.opPat),
2205+
!add(baseImm, 2)>;
2206+
// Ternary(A, eqv(B,C), and(B,C)) => imm: baseImm + 3
2207+
def : xxevalPattern<vt,
2208+
(vselect vt:$vA, XXEvalBinaryPattern<vt, xor, 1>.opPat, XXEvalBinaryPattern<vt, and>.opPat),
2209+
!add(baseImm, 3)>;
2210+
// Ternary(A, not(C), and(B,C)) => imm: baseImm + 4
2211+
def : xxevalPattern<vt,
2212+
(vselect vt:$vA, XXEvalUnaryPattern<vt>.vnotC, XXEvalBinaryPattern<vt, and>.opPat),
2213+
!add(baseImm, 4)>;
2214+
// Ternary(A, not(B), and(B,C)) => imm: baseImm + 6
2215+
def : xxevalPattern<vt,
2216+
(vselect vt:$vA, XXEvalUnaryPattern<vt>.vnotB, XXEvalBinaryPattern<vt, and>.opPat),
2217+
!add(baseImm, 6)>;
2218+
}
2219+
2220+
multiclass XXEvalVSelectWithXB<ValueType vt, bits<8> baseImm>{
2221+
// Multiclass for Ternary(A, X, B) style patterns
2222+
// Ternary(A, and(B,C), B) => imm: baseImm
2223+
def : xxevalPattern<vt,
2224+
(vselect vt:$vA, XXEvalBinaryPattern<vt, and>.opPat, vt:$vB),
2225+
baseImm>;
2226+
// Ternary(A, nor(B,C), B) => imm: baseImm + 7
2227+
def : xxevalPattern<vt,
2228+
(vselect vt:$vA, XXEvalBinaryPattern<vt, or, 1>.opPat, vt:$vB),
2229+
!add(baseImm, 7)>;
2230+
// Ternary(A, eqv(B,C), B) => imm: baseImm + 8
2231+
def : xxevalPattern<vt,
2232+
(vselect vt:$vA, XXEvalBinaryPattern<vt, xor, 1>.opPat, vt:$vB),
2233+
!add(baseImm, 8)>;
2234+
// Ternary(A, not(C), B) => imm: baseImm + 9
2235+
def : xxevalPattern<vt,
2236+
(vselect vt:$vA, XXEvalUnaryPattern<vt>.vnotC, vt:$vB),
2237+
!add(baseImm, 9)>;
2238+
// Ternary(A, nand(B,C), B) => imm: baseImm + 13
2239+
def : xxevalPattern<vt,
2240+
(vselect vt:$vA, XXEvalBinaryPattern<vt, and, 1>.opPat, vt:$vB),
2241+
!add(baseImm, 13)>;
2242+
}
2243+
2244+
multiclass XXEvalVSelectWithXC<ValueType vt, bits<8> baseImm>{
2245+
// Multiclass for Ternary(A, X, C) style patterns
2246+
// Ternary(A, and(B,C), C) => imm: baseImm
2247+
def : xxevalPattern<vt,
2248+
(vselect vt:$vA, XXEvalBinaryPattern<vt, and>.opPat, vt:$vC),
2249+
baseImm>;
2250+
// Ternary(A, nor(B,C), C) => imm: baseImm + 7
2251+
def : xxevalPattern<vt,
2252+
(vselect vt:$vA, XXEvalBinaryPattern<vt, or, 1>.opPat, vt:$vC),
2253+
!add(baseImm, 7)>;
2254+
// Ternary(A, eqv(B,C), C) => imm: baseImm + 8
2255+
def : xxevalPattern<vt,
2256+
(vselect vt:$vA, XXEvalBinaryPattern<vt, xor, 1>.opPat, vt:$vC),
2257+
!add(baseImm, 8)>;
2258+
// Ternary(A, nand(B,C), C) => imm: baseImm + 13
2259+
def : xxevalPattern<vt,
2260+
(vselect vt:$vA, XXEvalBinaryPattern<vt, and, 1>.opPat, vt:$vC),
2261+
!add(baseImm, 13)>;
2262+
}
2263+
2264+
multiclass XXEvalVSelectWithXXor<ValueType vt, bits<8> baseImm>{
2265+
// Multiclass for Ternary(A, X, xor(B,C)) style patterns
2266+
// Ternary(A, and(B,C), xor(B,C)) => imm: baseImm
2267+
def : xxevalPattern<vt,
2268+
(vselect vt:$vA, XXEvalBinaryPattern<vt, and>.opPat, XXEvalBinaryPattern<vt, xor>.opPat),
2269+
baseImm>;
2270+
// Ternary(A, B, xor(B,C)) => imm: baseImm + 2
2271+
def : xxevalPattern<vt,
2272+
(vselect vt:$vA, vt:$vB, XXEvalBinaryPattern<vt, xor>.opPat),
2273+
!add(baseImm, 2)>;
2274+
// Ternary(A, C, xor(B,C)) => imm: baseImm + 4
2275+
def : xxevalPattern<vt,
2276+
(vselect vt:$vA, vt:$vC, XXEvalBinaryPattern<vt, xor>.opPat),
2277+
!add(baseImm, 4)>;
2278+
// Ternary(A, or(B,C), xor(B,C)) => imm: baseImm + 6
2279+
def : xxevalPattern<vt,
2280+
(vselect vt:$vA, XXEvalBinaryPattern<vt, or>.opPat, XXEvalBinaryPattern<vt, xor>.opPat),
2281+
!add(baseImm, 6)>;
2282+
// Ternary(A, nor(B,C), xor(B,C)) => imm: baseImm + 7
2283+
def : xxevalPattern<vt,
2284+
(vselect vt:$vA, XXEvalBinaryPattern<vt, or, 1>.opPat, XXEvalBinaryPattern<vt, xor>.opPat),
2285+
!add(baseImm, 7)>;
2286+
}
21642287

21652288
let Predicates = [PrefixInstrs, HasP10Vector] in {
21662289
let AddedComplexity = 400 in {
@@ -2192,83 +2315,96 @@ let Predicates = [PrefixInstrs, HasP10Vector] in {
21922315
// Anonymous patterns for XXEVAL
21932316
// AND
21942317
// and(A, B, C)
2195-
def : xxevalPattern<(and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 1>;
2318+
def : xxevalPattern<v4i32, (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 1>;
21962319
// and(A, xor(B, C))
2197-
def : xxevalPattern<(and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 6>;
2320+
def : xxevalPattern<v4i32, (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 6>;
21982321
// and(A, or(B, C))
2199-
def : xxevalPattern<(and v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 7>;
2322+
def : xxevalPattern<v4i32, (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 7>;
22002323
// and(A, nor(B, C))
2201-
def : xxevalPattern<(and v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 8>;
2324+
def : xxevalPattern<v4i32, (and v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 8>;
22022325
// and(A, eqv(B, C))
2203-
def : xxevalPattern<(and v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 9>;
2326+
def : xxevalPattern<v4i32, (and v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 9>;
22042327
// and(A, nand(B, C))
2205-
def : xxevalPattern<(and v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 14>;
2328+
def : xxevalPattern<v4i32, (and v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 14>;
22062329

22072330
// NAND
22082331
// nand(A, B, C)
2209-
def : xxevalPattern<(vnot (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC))),
2332+
def : xxevalPattern<v4i32, (vnot (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC))),
22102333
!sub(255, 1)>;
22112334
// nand(A, xor(B, C))
2212-
def : xxevalPattern<(vnot (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))),
2335+
def : xxevalPattern<v4i32, (vnot (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))),
22132336
!sub(255, 6)>;
22142337
// nand(A, or(B, C))
2215-
def : xxevalPattern<(vnot (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC))),
2338+
def : xxevalPattern<v4i32, (vnot (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC))),
22162339
!sub(255, 7)>;
22172340
// nand(A, nor(B, C))
2218-
def : xxevalPattern<(or (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)),
2341+
def : xxevalPattern<v4i32, (or (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)),
22192342
!sub(255, 8)>;
22202343
// nand(A, eqv(B, C))
2221-
def : xxevalPattern<(or (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)),
2344+
def : xxevalPattern<v4i32, (or (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)),
22222345
!sub(255, 9)>;
22232346
// nand(A, nand(B, C))
2224-
def : xxevalPattern<(or (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)),
2347+
def : xxevalPattern<v4i32, (or (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)),
22252348
!sub(255, 14)>;
22262349

22272350
// EQV
22282351
// (eqv A, B, C)
2229-
def : xxevalPattern<(or (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)),
2352+
def : xxevalPattern<v4i32, (or (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)),
22302353
(vnot (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC)))),
22312354
150>;
22322355
// (eqv A, (and B, C))
2233-
def : xxevalPattern<(vnot (xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 225>;
2356+
def : xxevalPattern<v4i32, (vnot (xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 225>;
22342357
// (eqv A, (or B, C))
2235-
def : xxevalPattern<(vnot (xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 135>;
2358+
def : xxevalPattern<v4i32, (vnot (xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 135>;
22362359

22372360
// NOR
22382361
// (nor A, B, C)
2239-
def : xxevalPattern<(vnot (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 128>;
2362+
def : xxevalPattern<v4i32, (vnot (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 128>;
22402363
// (nor A, (and B, C))
2241-
def : xxevalPattern<(vnot (or v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 224>;
2364+
def : xxevalPattern<v4i32, (vnot (or v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 224>;
22422365
// (nor A, (eqv B, C))
2243-
def : xxevalPattern<(and (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), 96>;
2366+
def : xxevalPattern<v4i32, (and (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), 96>;
22442367
// (nor A, (nand B, C))
2245-
def : xxevalPattern<(and (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), 16>;
2368+
def : xxevalPattern<v4i32, (and (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), 16>;
22462369
// (nor A, (nor B, C))
2247-
def : xxevalPattern<(and (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), 112>;
2370+
def : xxevalPattern<v4i32, (and (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), 112>;
22482371
// (nor A, (xor B, C))
2249-
def : xxevalPattern<(vnot (or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), 144>;
2372+
def : xxevalPattern<v4i32, (vnot (or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), 144>;
22502373

22512374
// OR
22522375
// (or A, B, C)
2253-
def : xxevalPattern<(or v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 127>;
2376+
def : xxevalPattern<v4i32, (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 127>;
22542377
// (or A, (and B, C))
2255-
def : xxevalPattern<(or v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 31>;
2378+
def : xxevalPattern<v4i32, (or v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 31>;
22562379
// (or A, (eqv B, C))
2257-
def : xxevalPattern<(or v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 159>;
2380+
def : xxevalPattern<v4i32, (or v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 159>;
22582381
// (or A, (nand B, C))
2259-
def : xxevalPattern<(or v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 239>;
2382+
def : xxevalPattern<v4i32, (or v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 239>;
22602383
// (or A, (nor B, C))
2261-
def : xxevalPattern<(or v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 143>;
2384+
def : xxevalPattern<v4i32, (or v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 143>;
22622385
// (or A, (xor B, C))
2263-
def : xxevalPattern<(or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 111>;
2386+
def : xxevalPattern<v4i32, (or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 111>;
22642387

22652388
// XOR
22662389
// (xor A, B, C)
2267-
def : xxevalPattern<(xor v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 105>;
2390+
def : xxevalPattern<v4i32, (xor v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 105>;
22682391
// (xor A, (and B, C))
2269-
def : xxevalPattern<(xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 30>;
2392+
def : xxevalPattern<v4i32, (xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 30>;
22702393
// (xor A, (or B, C))
2271-
def : xxevalPattern<(xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 120>;
2394+
def : xxevalPattern<v4i32, (xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 120>;
2395+
2396+
// Utilize xxeval instruction for ternary vector expressions.
2397+
defm : XXEvalVSelectWithXAnd<v4i32, 22>;
2398+
defm : XXEvalVSelectWithXAnd<v2i64, 22>;
2399+
2400+
defm : XXEvalVSelectWithXB<v4i32, 49>;
2401+
defm : XXEvalVSelectWithXB<v2i64, 49>;
2402+
2403+
defm : XXEvalVSelectWithXC<v4i32, 81>;
2404+
defm : XXEvalVSelectWithXC<v2i64, 81>;
2405+
2406+
defm : XXEvalVSelectWithXXor<v4i32, 97>;
2407+
defm : XXEvalVSelectWithXXor<v2i64, 97>;
22722408

22732409
// Anonymous patterns to select prefixed VSX loads and stores.
22742410
// Load / Store f128

0 commit comments

Comments
 (0)