Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3480,6 +3480,17 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
break;
case ISD::VECTOR_COMPRESS: {
SDValue Vec = Op.getOperand(0);
SDValue PassThru = Op.getOperand(2);
Known = computeKnownBits(PassThru, DemandedElts, Depth + 1);
// If we don't know any bits, early out.
if (Known.isUnknown())
break;
Known2 = computeKnownBits(Vec, Depth + 1);
Known = Known.intersectWith(Known2);
break;
}
case ISD::VECTOR_SHUFFLE: {
assert(!Op.getValueType().isScalableVector());
// Collect the known bits that are shared by every vector element referenced
Expand Down Expand Up @@ -4789,6 +4800,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
}
return Tmp;

case ISD::VECTOR_COMPRESS: {
SDValue Vec = Op.getOperand(0);
SDValue PassThru = Op.getOperand(2);
Tmp = ComputeNumSignBits(PassThru, DemandedElts, Depth + 1);
if (Tmp == 1)
return 1;
Tmp2 = ComputeNumSignBits(Vec, Depth + 1);
Tmp = std::min(Tmp, Tmp2);
return Tmp;
}

case ISD::VECTOR_SHUFFLE: {
// Collect the minimum number of sign bits that are shared by every vector
// element referenced by the shuffle.
Expand Down
36 changes: 36 additions & 0 deletions llvm/test/CodeGen/AArch64/sve-vector-compress.ll
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,42 @@ define <vscale x 4 x i4> @test_compress_illegal_element_type(<vscale x 4 x i4> %
ret <vscale x 4 x i4> %out
}

define <vscale x 4 x i32> @test_compress_knownbits_zext(<vscale x 4 x i16> %vec, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %passthru) nounwind {
; CHECK-LABEL: test_compress_knownbits_zext:
; CHECK: // %bb.0:
; CHECK-NEXT: and z0.s, z0.s, #0xffff
; CHECK-NEXT: cntp x8, p0, p0.s
; CHECK-NEXT: and z1.s, z1.s, #0x3
; CHECK-NEXT: compact z0.s, p0, z0.s
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: ret
%xvec = zext <vscale x 4 x i16> %vec to <vscale x 4 x i32>
%xpassthru = and <vscale x 4 x i32> %passthru, splat (i32 3)
%out = call <vscale x 4 x i32> @llvm.experimental.vector.compress(<vscale x 4 x i32> %xvec, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %xpassthru)
%res = and <vscale x 4 x i32> %out, splat (i32 65535)
ret <vscale x 4 x i32> %res
}

define <vscale x 4 x i32> @test_compress_numsignbits_sext(<vscale x 4 x i16> %vec, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %passthru) nounwind {
; CHECK-LABEL: test_compress_numsignbits_sext:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: and z1.s, z1.s, #0x3
; CHECK-NEXT: cntp x8, p0, p0.s
; CHECK-NEXT: sxth z0.s, p1/m, z0.s
; CHECK-NEXT: compact z0.s, p0, z0.s
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: ret
%xvec = sext <vscale x 4 x i16> %vec to <vscale x 4 x i32>
%xpassthru = and <vscale x 4 x i32> %passthru, splat (i32 3)
%out = call <vscale x 4 x i32> @llvm.experimental.vector.compress(<vscale x 4 x i32> %xvec, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %xpassthru)
%shl = shl <vscale x 4 x i32> %out, splat (i32 16)
%res = ashr <vscale x 4 x i32> %shl, splat (i32 16)
ret <vscale x 4 x i32> %res
}

define <vscale x 8 x i32> @test_compress_large(<vscale x 8 x i32> %vec, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_compress_large:
; CHECK: // %bb.0:
Expand Down
113 changes: 113 additions & 0 deletions llvm/test/CodeGen/AArch64/vector-compress.ll
Original file line number Diff line number Diff line change
Expand Up @@ -471,3 +471,116 @@ define <3 x i3> @test_compress_narrow_illegal_element_type(<3 x i3> %vec, <3 x i
%out = call <3 x i3> @llvm.experimental.vector.compress(<3 x i3> %vec, <3 x i1> %mask, <3 x i3> undef)
ret <3 x i3> %out
}

define <4 x i32> @test_compress_knownbits_zext_v4i16_4i32(<4 x i16> %vec, <4 x i1> %mask, <4 x i32> %passthru) nounwind {
; CHECK-LABEL: test_compress_knownbits_zext_v4i16_4i32:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: ushll.4s v1, v1, #0
; CHECK-NEXT: movi.4s v3, #1
; CHECK-NEXT: mov x14, sp
; CHECK-NEXT: movi.4s v4, #3
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: mov x13, sp
; CHECK-NEXT: mov x12, sp
; CHECK-NEXT: mov x15, sp
; CHECK-NEXT: shl.4s v1, v1, #31
; CHECK-NEXT: and.16b v2, v2, v4
; CHECK-NEXT: cmlt.4s v1, v1, #0
; CHECK-NEXT: str q2, [sp]
; CHECK-NEXT: and.16b v3, v1, v3
; CHECK-NEXT: mov.s w8, v1[1]
; CHECK-NEXT: mov.s w9, v1[2]
; CHECK-NEXT: mov.s w10, v1[3]
; CHECK-NEXT: fmov w11, s1
; CHECK-NEXT: addv.4s s1, v3
; CHECK-NEXT: and x16, x11, #0x1
; CHECK-NEXT: and x8, x8, #0x1
; CHECK-NEXT: bfi x14, x11, #2, #1
; CHECK-NEXT: add x8, x16, x8
; CHECK-NEXT: and x9, x9, #0x1
; CHECK-NEXT: and x10, x10, #0x1
; CHECK-NEXT: fmov w11, s1
; CHECK-NEXT: add x9, x8, x9
; CHECK-NEXT: mov w16, #3 ; =0x3
; CHECK-NEXT: add x10, x9, x10
; CHECK-NEXT: orr x8, x12, x8, lsl #2
; CHECK-NEXT: bfi x15, x9, #2, #2
; CHECK-NEXT: cmp x10, #3
; CHECK-NEXT: bfi x13, x11, #2, #2
; CHECK-NEXT: mov.s w11, v0[3]
; CHECK-NEXT: csel x9, x10, x16, lo
; CHECK-NEXT: ldr w13, [x13]
; CHECK-NEXT: str s0, [sp]
; CHECK-NEXT: st1.s { v0 }[1], [x14]
; CHECK-NEXT: st1.s { v0 }[2], [x8]
; CHECK-NEXT: orr x8, x12, x9, lsl #2
; CHECK-NEXT: csel w9, w11, w13, hi
; CHECK-NEXT: st1.s { v0 }[3], [x15]
; CHECK-NEXT: str w9, [x8]
; CHECK-NEXT: ldr q0, [sp], #16
; CHECK-NEXT: ret
entry:
%xvec = zext <4 x i16> %vec to <4 x i32>
%xpassthru = and <4 x i32> %passthru, splat (i32 3)
%out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %xvec, <4 x i1> %mask, <4 x i32> %xpassthru)
%res = and <4 x i32> %out, splat (i32 65535)
ret <4 x i32> %res
}

define <4 x i32> @test_compress_numsignbits_sext_v4i16_4i32(<4 x i16> %vec, <4 x i1> %mask, <4 x i32> %passthru) nounwind {
; CHECK-LABEL: test_compress_numsignbits_sext_v4i16_4i32:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: ushll.4s v1, v1, #0
; CHECK-NEXT: movi.4s v3, #1
; CHECK-NEXT: mov x14, sp
; CHECK-NEXT: movi.4s v4, #3
; CHECK-NEXT: sshll.4s v0, v0, #0
; CHECK-NEXT: mov x13, sp
; CHECK-NEXT: mov x12, sp
; CHECK-NEXT: mov x15, sp
; CHECK-NEXT: shl.4s v1, v1, #31
; CHECK-NEXT: and.16b v2, v2, v4
; CHECK-NEXT: cmlt.4s v1, v1, #0
; CHECK-NEXT: str q2, [sp]
; CHECK-NEXT: and.16b v3, v1, v3
; CHECK-NEXT: mov.s w8, v1[1]
; CHECK-NEXT: mov.s w9, v1[2]
; CHECK-NEXT: mov.s w10, v1[3]
; CHECK-NEXT: fmov w11, s1
; CHECK-NEXT: addv.4s s1, v3
; CHECK-NEXT: and x16, x11, #0x1
; CHECK-NEXT: and x8, x8, #0x1
; CHECK-NEXT: bfi x14, x11, #2, #1
; CHECK-NEXT: add x8, x16, x8
; CHECK-NEXT: and x9, x9, #0x1
; CHECK-NEXT: and x10, x10, #0x1
; CHECK-NEXT: fmov w11, s1
; CHECK-NEXT: add x9, x8, x9
; CHECK-NEXT: mov w16, #3 ; =0x3
; CHECK-NEXT: add x10, x9, x10
; CHECK-NEXT: orr x8, x12, x8, lsl #2
; CHECK-NEXT: bfi x15, x9, #2, #2
; CHECK-NEXT: cmp x10, #3
; CHECK-NEXT: bfi x13, x11, #2, #2
; CHECK-NEXT: mov.s w11, v0[3]
; CHECK-NEXT: csel x9, x10, x16, lo
; CHECK-NEXT: ldr w13, [x13]
; CHECK-NEXT: str s0, [sp]
; CHECK-NEXT: st1.s { v0 }[1], [x14]
; CHECK-NEXT: st1.s { v0 }[2], [x8]
; CHECK-NEXT: orr x8, x12, x9, lsl #2
; CHECK-NEXT: csel w9, w11, w13, hi
; CHECK-NEXT: st1.s { v0 }[3], [x15]
; CHECK-NEXT: str w9, [x8]
; CHECK-NEXT: ldr q0, [sp], #16
; CHECK-NEXT: ret
entry:
%xvec = sext <4 x i16> %vec to <4 x i32>
%xpassthru = and <4 x i32> %passthru, splat(i32 3)
%out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %xvec, <4 x i1> %mask, <4 x i32> %xpassthru)
%shl = shl <4 x i32> %out, splat(i32 16)
%res = ashr <4 x i32> %shl, splat(i32 16)
ret <4 x i32> %res
}
33 changes: 33 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/vector-compress.ll
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,39 @@ define <vscale x 4 x i32> @vector_compress_nxv4i32_passthru(<vscale x 4 x i32> %
ret <vscale x 4 x i32> %ret
}

define <vscale x 4 x i32> @test_compress_nvx8f64_knownbits(<vscale x 4 x i16> %vec, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %passthru) nounwind {
; CHECK-LABEL: test_compress_nvx8f64_knownbits:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v12, v8
; CHECK-NEXT: vand.vi v8, v10, 3
; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma
; CHECK-NEXT: vcompress.vm v8, v12, v0
; CHECK-NEXT: ret
%xvec = zext <vscale x 4 x i16> %vec to <vscale x 4 x i32>
%xpassthru = and <vscale x 4 x i32> %passthru, splat (i32 3)
%out = call <vscale x 4 x i32> @llvm.experimental.vector.compress(<vscale x 4 x i32> %xvec, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %xpassthru)
%res = and <vscale x 4 x i32> %out, splat (i32 65535)
ret <vscale x 4 x i32> %res
}

define <vscale x 4 x i32> @test_compress_nv8xf64_numsignbits(<vscale x 4 x i16> %vec, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %passthru) nounwind {
; CHECK-LABEL: test_compress_nv8xf64_numsignbits:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vsext.vf2 v12, v8
; CHECK-NEXT: vand.vi v8, v10, 3
; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma
; CHECK-NEXT: vcompress.vm v8, v12, v0
; CHECK-NEXT: ret
%xvec = sext <vscale x 4 x i16> %vec to <vscale x 4 x i32>
%xpassthru = and <vscale x 4 x i32> %passthru, splat (i32 3)
%out = call <vscale x 4 x i32> @llvm.experimental.vector.compress(<vscale x 4 x i32> %xvec, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %xpassthru)
%shl = shl <vscale x 4 x i32> %out, splat (i32 16)
%res = ashr <vscale x 4 x i32> %shl, splat (i32 16)
ret <vscale x 4 x i32> %res
}

define <vscale x 8 x i32> @vector_compress_nxv8i32(<vscale x 8 x i32> %data, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: vector_compress_nxv8i32:
; CHECK: # %bb.0:
Expand Down
Loading