Skip to content

Commit 80ce067

Browse files
committed
Expanding the Histogram Intrinsic
Expanding the Histogram intrinsic to support more update options, uadd.sat, umax, umin.
1 parent b55f751 commit 80ce067

File tree

7 files changed

+327
-24
lines changed

7 files changed

+327
-24
lines changed

llvm/docs/LangRef.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20295,6 +20295,9 @@ More update operation types may be added in the future.
2029520295

2029620296
declare void @llvm.experimental.vector.histogram.add.v8p0.i32(<8 x ptr> %ptrs, i32 %inc, <8 x i1> %mask)
2029720297
declare void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> %ptrs, i64 %inc, <vscale x 2 x i1> %mask)
20298+
declare void @llvm.experimental.vector.histogram.uadd.sat.v8p0.i32(<8 x ptr> %ptrs, i32 %inc, <8 x i1> %mask)
20299+
declare void @llvm.experimental.vector.histogram.umax.v8p0.i32(<8 x ptr> %ptrs, i32 %val, <8 x i1> %mask)
20300+
declare void @llvm.experimental.vector.histogram.umin.v8p0.i32(<8 x ptr> %ptrs, i32 %val, <8 x i1> %mask)
2029820301

2029920302
Arguments:
2030020303
""""""""""

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1947,6 +1947,24 @@ def int_experimental_vector_histogram_add : DefaultAttrsIntrinsic<[],
19471947
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], // Mask
19481948
[ IntrArgMemOnly ]>;
19491949

1950+
def int_experimental_vector_histogram_uadd_sat : DefaultAttrsIntrinsic<[],
1951+
[ llvm_anyvector_ty, // Vector of pointers
1952+
llvm_anyint_ty, // Increment
1953+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], // Mask
1954+
[ IntrArgMemOnly ]>;
1955+
1956+
def int_experimental_vector_histogram_umin : DefaultAttrsIntrinsic<[],
1957+
[ llvm_anyvector_ty, // Vector of pointers
1958+
llvm_anyint_ty, // Update value
1959+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], // Mask
1960+
[ IntrArgMemOnly ]>;
1961+
1962+
def int_experimental_vector_histogram_umax : DefaultAttrsIntrinsic<[],
1963+
[ llvm_anyvector_ty, // Vector of pointers
1964+
llvm_anyint_ty, // Update value
1965+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], // Mask
1966+
[ IntrArgMemOnly ]>;
1967+
19501968
// Experimental match
19511969
def int_experimental_vector_match : DefaultAttrsIntrinsic<
19521970
[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ],

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1079,34 +1079,41 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
10791079

10801080
/// Find histogram operations that match high-level code in loops:
10811081
/// \code
1082-
/// buckets[indices[i]]+=step;
1082+
/// buckets[indices[i]] = UpdateOpeartor(buckets[indices[i]], Val);
10831083
/// \endcode
1084+
/// When updateOperator can be add, sub, add.sat, umin, umax, sub.
10841085
///
10851086
/// It matches a pattern starting from \p HSt, which Stores to the 'buckets'
1086-
/// array the computed histogram. It uses a BinOp to sum all counts, storing
1087-
/// them using a loop-variant index Load from the 'indices' input array.
1087+
/// array the computed histogram. It uses a update instruction to update all
1088+
/// counts, storing them using a loop-variant index Load from the 'indices'
1089+
/// input array.
10881090
///
10891091
/// On successful matches it updates the STATISTIC 'HistogramsDetected',
10901092
/// regardless of hardware support. When there is support, it additionally
1091-
/// stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
1093+
/// stores the UpdateOp/Load pairs in \p HistogramCounts, as well the pointers
10921094
/// used to update histogram in \p HistogramPtrs.
10931095
static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop,
10941096
const PredicatedScalarEvolution &PSE,
10951097
SmallVectorImpl<HistogramInfo> &Histograms) {
10961098

1097-
// Store value must come from a Binary Operation.
10981099
Instruction *HPtrInstr = nullptr;
1099-
BinaryOperator *HBinOp = nullptr;
1100-
if (!match(HSt, m_Store(m_BinOp(HBinOp), m_Instruction(HPtrInstr))))
1100+
Instruction *HInstr = nullptr;
1101+
if (!match(HSt, m_Store(m_Instruction(HInstr), m_Instruction(HPtrInstr))))
11011102
return false;
11021103

11031104
// BinOp must be an Add or a Sub modifying the bucket value by a
11041105
// loop invariant amount.
11051106
// FIXME: We assume the loop invariant term is on the RHS.
11061107
// Fine for an immediate/constant, but maybe not a generic value?
11071108
Value *HIncVal = nullptr;
1108-
if (!match(HBinOp, m_Add(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))) &&
1109-
!match(HBinOp, m_Sub(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))))
1109+
if (!match(HInstr, m_Add(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))) &&
1110+
!match(HInstr, m_Sub(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))) &&
1111+
!match(HInstr, m_Intrinsic<Intrinsic::uadd_sat>(
1112+
m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))) &&
1113+
!match(HInstr, m_Intrinsic<Intrinsic::umax>(m_Load(m_Specific(HPtrInstr)),
1114+
m_Value(HIncVal))) &&
1115+
!match(HInstr, m_Intrinsic<Intrinsic::umin>(m_Load(m_Specific(HPtrInstr)),
1116+
m_Value(HIncVal))))
11101117
return false;
11111118

11121119
// Make sure the increment value is loop invariant.
@@ -1148,15 +1155,15 @@ static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop,
11481155

11491156
// Ensure we'll have the same mask by checking that all parts of the histogram
11501157
// (gather load, update, scatter store) are in the same block.
1151-
LoadInst *IndexedLoad = cast<LoadInst>(HBinOp->getOperand(0));
1158+
LoadInst *IndexedLoad = cast<LoadInst>(HInstr->getOperand(0));
11521159
BasicBlock *LdBB = IndexedLoad->getParent();
1153-
if (LdBB != HBinOp->getParent() || LdBB != HSt->getParent())
1160+
if (LdBB != HInstr->getParent() || LdBB != HSt->getParent())
11541161
return false;
11551162

11561163
LLVM_DEBUG(dbgs() << "LV: Found histogram for: " << *HSt << "\n");
11571164

11581165
// Store the operations that make up the histogram.
1159-
Histograms.emplace_back(IndexedLoad, HBinOp, HSt);
1166+
Histograms.emplace_back(IndexedLoad, HInstr, HSt);
11601167
return true;
11611168
}
11621169

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8634,14 +8634,16 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
86348634
ArrayRef<VPValue *> Operands) {
86358635
// FIXME: Support other operations.
86368636
unsigned Opcode = HI->Update->getOpcode();
8637-
assert((Opcode == Instruction::Add || Opcode == Instruction::Sub) &&
8638-
"Histogram update operation must be an Add or Sub");
8637+
assert(VPHistogramRecipe::isLegalUpdateInstruction(HI->Update) &&
8638+
"Found Ilegal update instruction for histogram");
86398639

86408640
SmallVector<VPValue *, 3> HGramOps;
86418641
// Bucket address.
86428642
HGramOps.push_back(Operands[1]);
86438643
// Increment value.
86448644
HGramOps.push_back(getVPValueOrAddLiveIn(HI->Update->getOperand(1)));
8645+
// Update Instruction.
8646+
HGramOps.push_back(getVPValueOrAddLiveIn(HI->Update));
86458647

86468648
// In case of predicated execution (due to tail-folding, or conditional
86478649
// execution, or both), pass the relevant mask.

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1466,9 +1466,16 @@ class VPHistogramRecipe : public VPRecipeBase {
14661466
/// Return the mask operand if one was provided, or a null pointer if all
14671467
/// lanes should be executed unconditionally.
14681468
VPValue *getMask() const {
1469-
return getNumOperands() == 3 ? getOperand(2) : nullptr;
1469+
return getNumOperands() == 4 ? getOperand(3) : nullptr;
14701470
}
14711471

1472+
/// Returns true if \p I is a legal update instruction of histogram operation.
1473+
static bool isLegalUpdateInstruction(Instruction *I);
1474+
1475+
/// Given update instruction \p I, returns the opcode of the coresponding
1476+
/// histogram instruction.
1477+
static unsigned getHistogramOpcode(Instruction *I);
1478+
14721479
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
14731480
/// Print the recipe
14741481
void print(raw_ostream &O, const Twine &Indent,

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 76 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1223,6 +1223,7 @@ void VPHistogramRecipe::execute(VPTransformState &State) {
12231223

12241224
Value *Address = State.get(getOperand(0));
12251225
Value *IncAmt = State.get(getOperand(1), /*IsScalar=*/true);
1226+
Instruction *UpdateInst = cast<Instruction>(State.get(getOperand(2)));
12261227
VectorType *VTy = cast<VectorType>(Address->getType());
12271228

12281229
// The histogram intrinsic requires a mask even if the recipe doesn't;
@@ -1239,10 +1240,10 @@ void VPHistogramRecipe::execute(VPTransformState &State) {
12391240
// add a separate intrinsic in future, but for now we'll try this.
12401241
if (Opcode == Instruction::Sub)
12411242
IncAmt = Builder.CreateNeg(IncAmt);
1242-
else
1243-
assert(Opcode == Instruction::Add && "only add or sub supported for now");
1243+
assert(isLegalUpdateInstruction(UpdateInst) &&
1244+
"Found Ilegal update instruction for histogram");
12441245

1245-
State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,
1246+
State.Builder.CreateIntrinsic(getHistogramOpcode(UpdateInst),
12461247
{VTy, IncAmt->getType()},
12471248
{Address, IncAmt, Mask});
12481249
}
@@ -1277,24 +1278,51 @@ InstructionCost VPHistogramRecipe::computeCost(ElementCount VF,
12771278
IntrinsicCostAttributes ICA(Intrinsic::experimental_vector_histogram_add,
12781279
Type::getVoidTy(Ctx.LLVMCtx),
12791280
{PtrTy, IncTy, MaskTy});
1281+
auto *UpdateInst = getOperand(2)->getUnderlyingValue();
1282+
InstructionCost UpdateCost;
1283+
if (isa<IntrinsicInst>(UpdateInst)) {
1284+
IntrinsicCostAttributes UpdateICA(Opcode, IncTy, {IncTy, IncTy});
1285+
UpdateCost = Ctx.TTI.getIntrinsicInstrCost(UpdateICA, Ctx.CostKind);
1286+
} else
1287+
UpdateCost = Ctx.TTI.getArithmeticInstrCost(Opcode, VTy, Ctx.CostKind);
12801288

12811289
// Add the costs together with the add/sub operation.
12821290
return Ctx.TTI.getIntrinsicInstrCost(ICA, Ctx.CostKind) + MulCost +
1283-
Ctx.TTI.getArithmeticInstrCost(Opcode, VTy, Ctx.CostKind);
1291+
UpdateCost;
12841292
}
12851293

12861294
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
12871295
void VPHistogramRecipe::print(raw_ostream &O, const Twine &Indent,
12881296
VPSlotTracker &SlotTracker) const {
1297+
auto *UpdateInst = cast<Instruction>(getOperand(2)->getUnderlyingValue());
1298+
assert(isLegalUpdateInstruction(UpdateInst) &&
1299+
"Found Ilegal update instruction for histogram");
12891300
O << Indent << "WIDEN-HISTOGRAM buckets: ";
12901301
getOperand(0)->printAsOperand(O, SlotTracker);
12911302

1292-
if (Opcode == Instruction::Sub)
1293-
O << ", dec: ";
1294-
else {
1295-
assert(Opcode == Instruction::Add);
1296-
O << ", inc: ";
1303+
std::string UpdateMsg;
1304+
if (isa<BinaryOperator>(UpdateInst)) {
1305+
if (Opcode == Instruction::Sub)
1306+
UpdateMsg = ", dec: ";
1307+
else {
1308+
UpdateMsg = ", inc: ";
1309+
}
1310+
} else {
1311+
switch (cast<IntrinsicInst>(UpdateInst)->getIntrinsicID()) {
1312+
case Intrinsic::uadd_sat:
1313+
UpdateMsg = ", saturated inc: ";
1314+
break;
1315+
case Intrinsic::umax:
1316+
UpdateMsg = ", max: ";
1317+
break;
1318+
case Intrinsic::umin:
1319+
UpdateMsg = ", min: ";
1320+
break;
1321+
default:
1322+
llvm_unreachable("Found Ilegal update instruction for histogram");
1323+
}
12971324
}
1325+
O << UpdateMsg;
12981326
getOperand(1)->printAsOperand(O, SlotTracker);
12991327

13001328
if (VPValue *Mask = getMask()) {
@@ -1303,6 +1331,45 @@ void VPHistogramRecipe::print(raw_ostream &O, const Twine &Indent,
13031331
}
13041332
}
13051333

1334+
bool VPHistogramRecipe::isLegalUpdateInstruction(Instruction *I) {
1335+
// We only support add and sub instructions and the following list of
1336+
// intrinsics: uadd.sat, umax, umin.
1337+
if (isa<BinaryOperator>(I))
1338+
return I->getOpcode() == Instruction::Add ||
1339+
I->getOpcode() == Instruction::Sub;
1340+
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
1341+
switch (II->getIntrinsicID()) {
1342+
case Intrinsic::uadd_sat:
1343+
case Intrinsic::umax:
1344+
case Intrinsic::umin:
1345+
return true;
1346+
default:
1347+
return false;
1348+
}
1349+
}
1350+
return false;
1351+
}
1352+
1353+
unsigned VPHistogramRecipe::getHistogramOpcode(Instruction *I) {
1354+
// We only support add and sub instructions and the following list of
1355+
// intrinsics: uadd.sat, umax, umin.
1356+
assert(isLegalUpdateInstruction(I) &&
1357+
"Found Ilegal update instruction for histogram");
1358+
if (isa<BinaryOperator>(I))
1359+
return Intrinsic::experimental_vector_histogram_add;
1360+
auto *II = cast<IntrinsicInst>(I);
1361+
switch (II->getIntrinsicID()) {
1362+
case Intrinsic::uadd_sat:
1363+
return Intrinsic::experimental_vector_histogram_uadd_sat;
1364+
case Intrinsic::umax:
1365+
return Intrinsic::experimental_vector_histogram_umax;
1366+
case Intrinsic::umin:
1367+
return Intrinsic::experimental_vector_histogram_umin;
1368+
default:
1369+
llvm_unreachable("Found Ilegal update instruction for histogram");
1370+
}
1371+
}
1372+
13061373
void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
13071374
VPSlotTracker &SlotTracker) const {
13081375
O << Indent << "WIDEN-SELECT ";

0 commit comments

Comments
 (0)