Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit ec0b9c8

Browse files
author
James Molloy
committed
[VectorUtils] Query number of sign bits to allow more truncations
When deciding if a vector calculation can be done in a smaller bitwidth, use sign bit information from ValueTracking to add more information and allow more truncations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268921 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 43d2886 commit ec0b9c8

File tree

2 files changed

+50
-4
lines changed

2 files changed

+50
-4
lines changed

lib/Analysis/VectorUtils.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,9 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
320320
SmallPtrSet<Instruction *, 4> InstructionSet;
321321
MapVector<Instruction *, uint64_t> MinBWs;
322322

323+
assert(Blocks.size() > 0 && "Must have at least one block!");
324+
const DataLayout &DL = Blocks[0]->getModule()->getDataLayout();
325+
323326
// Determine the roots. We work bottom-up, from truncs or icmps.
324327
bool SeenExtFromIllegalType = false;
325328
for (auto *BB : Blocks)
@@ -363,12 +366,19 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
363366

364367
// If we encounter a type that is larger than 64 bits, we can't represent
365368
// it so bail out.
366-
if (DB.getDemandedBits(I).getBitWidth() > 64)
369+
APInt NeededBits = DB.getDemandedBits(I);
370+
unsigned BW = NeededBits.getBitWidth();
371+
if (BW > 64)
367372
return MapVector<Instruction *, uint64_t>();
368373

369-
uint64_t V = DB.getDemandedBits(I).getZExtValue();
370-
DBits[Leader] |= V;
371-
DBits[I] = V;
374+
auto NSB = ComputeNumSignBits(I, DL);
375+
376+
// Query demanded bits for the bits required by the instruction. Remove
377+
// any bits that are equal to the sign bit, because we can truncate the
378+
// instruction without changing their value.
379+
NeededBits &= APInt::getLowBitsSet(BW, BW - NSB);
380+
DBits[Leader] |= NeededBits.getZExtValue();
381+
DBits[I] |= NeededBits.getZExtValue();
372382

373383
// Casts, loads and instructions outside of our range terminate a chain
374384
// successfully.

test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,5 +263,41 @@ for.body: ; preds = %entry, %for.body
263263
br i1 %exitcond, label %for.cond.cleanup, label %for.body
264264
}
265265

266+
; CHECK-LABEL: @add_g
267+
; CHECK: load <16 x i8>
268+
; CHECK: xor <16 x i8>
269+
; CHECK: icmp ult <16 x i8>
270+
; CHECK: select <16 x i1> {{.*}}, <16 x i8>
271+
; CHECK: store <16 x i8>
272+
define void @add_g(i8* noalias nocapture readonly %p, i8* noalias nocapture readonly %q, i8* noalias nocapture
273+
%r, i8 %arg1, i32 %len) #0 {
274+
%1 = icmp sgt i32 %len, 0
275+
br i1 %1, label %.lr.ph, label %._crit_edge
276+
277+
.lr.ph: ; preds = %0
278+
%2 = sext i8 %arg1 to i64
279+
br label %3
280+
281+
._crit_edge: ; preds = %3, %0
282+
ret void
283+
284+
; <label>:3 ; preds = %3, %.lr.ph
285+
%indvars.iv = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next, %3 ]
286+
%x4 = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
287+
%x5 = load i8, i8* %x4
288+
%x7 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
289+
%x8 = load i8, i8* %x7
290+
%x9 = zext i8 %x5 to i32
291+
%x10 = xor i32 %x9, 255
292+
%x11 = icmp ult i32 %x10, 24
293+
%x12 = select i1 %x11, i32 %x10, i32 24
294+
%x13 = trunc i32 %x12 to i8
295+
store i8 %x13, i8* %x4
296+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
297+
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
298+
%exitcond = icmp eq i32 %lftr.wideiv, %len
299+
br i1 %exitcond, label %._crit_edge, label %3
300+
}
301+
266302
attributes #0 = { nounwind }
267303

0 commit comments

Comments
 (0)