@@ -144,7 +144,9 @@ class MemoryDepChecker {
144144 // on MinDepDistBytes.
145145 BackwardVectorizable,
146146 // Same, but may prevent store-to-load forwarding.
147- BackwardVectorizableButPreventsForwarding
147+ BackwardVectorizableButPreventsForwarding,
148+ // Access is to a loop loaded value, but is part of a histogram operation.
149+ Histogram
148150 };
149151
150152 // / String version of the types.
@@ -201,7 +203,8 @@ class MemoryDepChecker {
201203 // / Only checks sets with elements in \p CheckDeps.
202204 bool areDepsSafe (DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
203205 const DenseMap<Value *, SmallVector<const Value *, 16 >>
204- &UnderlyingObjects);
206+ &UnderlyingObjects,
207+ const SmallPtrSetImpl<const Value *> &HistogramPtrs);
205208
206209 // / No memory dependence was encountered that would inhibit
207210 // / vectorization.
@@ -352,7 +355,8 @@ class MemoryDepChecker {
352355 isDependent (const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
353356 unsigned BIdx,
354357 const DenseMap<Value *, SmallVector<const Value *, 16 >>
355- &UnderlyingObjects);
358+ &UnderlyingObjects,
359+ const SmallPtrSetImpl<const Value *> &HistogramPtrs);
356360
357361 // / Check whether the data dependence could prevent store-load
358362 // / forwarding.
@@ -393,7 +397,8 @@ class MemoryDepChecker {
393397 const MemAccessInfo &A, Instruction *AInst, const MemAccessInfo &B,
394398 Instruction *BInst,
395399 const DenseMap<Value *, SmallVector<const Value *, 16 >>
396- &UnderlyingObjects);
400+ &UnderlyingObjects,
401+ const SmallPtrSetImpl<const Value *> &HistogramPtrs);
397402};
398403
399404class RuntimePointerChecking ;
@@ -445,6 +450,15 @@ struct PointerDiffInfo {
445450 NeedsFreeze (NeedsFreeze) {}
446451};
447452
453+ struct HistogramInfo {
454+ LoadInst *Load;
455+ Instruction *Update;
456+ StoreInst *Store;
457+
458+ HistogramInfo (LoadInst *Load, Instruction *Update, StoreInst *Store)
459+ : Load(Load), Update(Update), Store(Store) {}
460+ };
461+
448462// / Holds information about the memory runtime legality checks to verify
449463// / that a group of pointers do not overlap.
450464class RuntimePointerChecking {
@@ -625,6 +639,13 @@ class RuntimePointerChecking {
625639// / Checks for both memory dependences and the SCEV predicates contained in the
626640// / PSE must be emitted in order for the results of this analysis to be valid.
627641class LoopAccessInfo {
642+ // / Represents whether the memory access dependencies in the loop:
643+ // / * Prohibit vectorization
644+ // / * Allow for vectorization (possibly with runtime checks)
645+ // / * Allow for vectorization (possibly with runtime checks),
646+ // / as long as histogram operations are supported.
647+ enum VecMemPossible { CantVec = 0 , NormalVec = 1 , HistogramVec = 2 };
648+
628649public:
629650 LoopAccessInfo (Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI,
630651 const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT,
@@ -636,7 +657,11 @@ class LoopAccessInfo {
636657 // / hasStoreStoreDependenceInvolvingLoopInvariantAddress and
637658 // / hasLoadStoreDependenceInvolvingLoopInvariantAddress also need to be
638659 // / checked.
639- bool canVectorizeMemory () const { return CanVecMem; }
660+ bool canVectorizeMemory () const { return CanVecMem == NormalVec; }
661+
662+ bool canVectorizeMemoryWithHistogram () const {
663+ return CanVecMem == NormalVec || CanVecMem == HistogramVec;
664+ }
640665
641666 // / Return true if there is a convergent operation in the loop. There may
642667 // / still be reported runtime pointer checks that would be required, but it is
@@ -664,6 +689,10 @@ class LoopAccessInfo {
664689 unsigned getNumStores () const { return NumStores; }
665690 unsigned getNumLoads () const { return NumLoads;}
666691
692+ const SmallVectorImpl<HistogramInfo> &getHistograms () const {
693+ return Histograms;
694+ }
695+
667696 // / The diagnostics report generated for the analysis. E.g. why we
668697 // / couldn't analyze the loop.
669698 const OptimizationRemarkAnalysis *getReport () const { return Report.get (); }
@@ -715,8 +744,8 @@ class LoopAccessInfo {
715744private:
716745 // / Analyze the loop. Returns true if all memory access in the loop can be
717746 // / vectorized.
718- bool analyzeLoop (AAResults *AA, LoopInfo *LI, const TargetLibraryInfo *TLI ,
719- DominatorTree *DT);
747+ VecMemPossible analyzeLoop (AAResults *AA, LoopInfo *LI,
748+ const TargetLibraryInfo *TLI, DominatorTree *DT);
720749
721750 // / Check if the structure of the loop allows it to be analyzed by this
722751 // / pass.
@@ -757,7 +786,7 @@ class LoopAccessInfo {
757786 unsigned NumStores = 0 ;
758787
759788 // / Cache the result of analyzeLoop.
760- bool CanVecMem = false ;
789+ VecMemPossible CanVecMem = CantVec ;
761790 bool HasConvergentOp = false ;
762791
763792 // / Indicator that there are two non vectorizable stores to the same uniform
@@ -777,6 +806,13 @@ class LoopAccessInfo {
777806 // / If an access has a symbolic strides, this maps the pointer value to
778807 // / the stride symbol.
779808 DenseMap<Value *, const SCEV *> SymbolicStrides;
809+
810+ // / Holds the load, update, and store instructions for all histogram-style
811+ // / operations found in the loop.
812+ SmallVector<HistogramInfo, 2 > Histograms;
813+
814+ // / Storing Histogram Pointers
815+ SmallPtrSet<const Value *, 2 > HistogramPtrs;
780816};
781817
782818// / Return the SCEV corresponding to a pointer with the symbolic stride
0 commit comments