diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 43a3fbf4d1306..10f96f43f8a07 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -207,10 +207,15 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, if (VT.isVector()) { unsigned NumElts = VT.getVectorNumElements(); EVT EltVT = VT.getVectorElementType(); - // Vectors with an even number of f16 elements will be passed to - // us as an array of v2f16/v2bf16 elements. We must match this so we - // stay in sync with Ins/Outs. - if ((Is16bitsType(EltVT.getSimpleVT())) && NumElts % 2 == 0) { + // We require power-of-2 sized vectors becuase + // TargetLoweringBase::getVectorTypeBreakdown() which is invoked in + // ComputePTXValueVTs() cannot currently break down non-power-of-2 sized + // vectors. + if ((Is16bitsType(EltVT.getSimpleVT())) && NumElts % 2 == 0 && + isPowerOf2_32(NumElts)) { + // Vectors with an even number of f16 elements will be passed to + // us as an array of v2f16/v2bf16 elements. We must match this so we + // stay in sync with Ins/Outs. switch (EltVT.getSimpleVT().SimpleTy) { case MVT::f16: EltVT = MVT::v2f16; @@ -226,7 +231,8 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, } NumElts /= 2; } else if (EltVT.getSimpleVT() == MVT::i8 && - (NumElts % 4 == 0 || NumElts == 3)) { + ((NumElts % 4 == 0 && isPowerOf2_32(NumElts)) || + NumElts == 3)) { // v*i8 are formally lowered as v4i8 EltVT = MVT::v4i8; NumElts = (NumElts + 3) / 4; diff --git a/llvm/test/CodeGen/NVPTX/compute-ptx-value-vts.ll b/llvm/test/CodeGen/NVPTX/compute-ptx-value-vts.ll new file mode 100644 index 0000000000000..a88c5637f089b --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/compute-ptx-value-vts.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s + +target triple = "nvptx-nvidia-cuda" + +define <6 x half> @half6() { +; CHECK-LABEL: half6( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.b16 %rs1, 0x0000; +; CHECK-NEXT: st.param.v4.b16 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b16 [func_retval0+8], {%rs1, %rs1}; +; CHECK-NEXT: ret; + ret <6 x half> zeroinitializer +} + +define <10 x half> @half10() { +; CHECK-LABEL: half10( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.b16 %rs1, 0x0000; +; CHECK-NEXT: st.param.v4.b16 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b16 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b16 [func_retval0+16], {%rs1, %rs1}; +; CHECK-NEXT: ret; + ret <10 x half> zeroinitializer +} + +define <12 x i8> @byte12() { +; CHECK-LABEL: byte12( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: ret; + ret <12 x i8> zeroinitializer +} + +define <20 x i8> @byte20() { +; CHECK-LABEL: byte20( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+12], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+16], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: ret; + ret <20 x i8> zeroinitializer +} diff --git a/llvm/test/CodeGen/NVPTX/vector-returns.ll b/llvm/test/CodeGen/NVPTX/vector-returns.ll new file mode 100644 index 0000000000000..0d2ad2c9bee75 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/vector-returns.ll @@ -0,0 +1,520 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s + +target triple = "nvptx-nvidia-cuda" + +define <3 x i64> @long3() { +; CHECK-LABEL: long3( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u64 %rd1, 0; +; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd1, %rd1}; +; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd1; +; CHECK-NEXT: ret; + ret <3 x i64> zeroinitializer +} + +define <2 x i64> @long2() { +; CHECK-LABEL: long2( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u64 %rd1, 0; +; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd1, %rd1}; +; CHECK-NEXT: ret; + ret <2 x i64> zeroinitializer +} + +define <1 x i64> @long1() { +; CHECK-LABEL: long1( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u64 %rd1, 0; +; CHECK-NEXT: st.param.b64 [func_retval0+0], %rd1; +; CHECK-NEXT: ret; + ret <1 x i64> zeroinitializer +} + +define <5 x i32> @int5() { +; CHECK-LABEL: int5( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.b32 %r1, 0; +; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r1, %r1, %r1, %r1}; +; CHECK-NEXT: st.param.b32 [func_retval0+16], %r1; +; CHECK-NEXT: ret; + ret <5 x i32> zeroinitializer +} + +define <4 x i32> @int4() { +; CHECK-LABEL: int4( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.b32 %r1, 0; +; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r1, %r1, %r1, %r1}; +; CHECK-NEXT: ret; + ret <4 x i32> zeroinitializer +} + +define <3 x i32> @int3() { +; CHECK-LABEL: int3( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.b32 %r1, 0; +; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r1, %r1}; +; CHECK-NEXT: st.param.b32 [func_retval0+8], %r1; +; CHECK-NEXT: ret; + ret <3 x i32> zeroinitializer +} + +define <2 x i32> @int2() { +; CHECK-LABEL: int2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.b32 %r1, 0; +; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r1, %r1}; +; CHECK-NEXT: ret; + ret <2 x i32> zeroinitializer +} + +define <1 x i32> @int1() { +; CHECK-LABEL: int1( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.b32 %r1, 0; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: ret; + ret <1 x i32> zeroinitializer +} + +define <9 x i16> @short9() { +; CHECK-LABEL: short9( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.v4.b16 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b16 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.b16 [func_retval0+16], %rs1; +; CHECK-NEXT: ret; + ret <9 x i16> zeroinitializer +} + +define <8 x i16> @short8() { +; CHECK-LABEL: short8( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.b32 %r1, 0; +; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r1, %r1, %r1, %r1}; +; CHECK-NEXT: ret; + ret <8 x i16> zeroinitializer +} + +define <7 x i16> @short7() { +; CHECK-LABEL: short7( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.v4.b16 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b16 [func_retval0+8], {%rs1, %rs1}; +; CHECK-NEXT: st.param.b16 [func_retval0+12], %rs1; +; CHECK-NEXT: ret; + ret <7 x i16> zeroinitializer +} + +define <5 x i16> @short5() { +; CHECK-LABEL: short5( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.v4.b16 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.b16 [func_retval0+8], %rs1; +; CHECK-NEXT: ret; + ret <5 x i16> zeroinitializer +} + +define <4 x i16> @short4() { +; CHECK-LABEL: short4( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.b32 %r1, 0; +; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r1, %r1}; +; CHECK-NEXT: ret; + ret <4 x i16> zeroinitializer +} + +define <3 x i16> @short3() { +; CHECK-LABEL: short3( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.v2.b16 [func_retval0+0], {%rs1, %rs1}; +; CHECK-NEXT: st.param.b16 [func_retval0+4], %rs1; +; CHECK-NEXT: ret; + ret <3 x i16> zeroinitializer +} + +define <2 x i16> @short2() { +; CHECK-LABEL: short2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.b32 %r1, 0; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: ret; + ret <2 x i16> zeroinitializer +} + +define <1 x i16> @short1() { +; CHECK-LABEL: short1( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.b16 [func_retval0+0], %rs1; +; CHECK-NEXT: ret; + ret <1 x i16> zeroinitializer +} + +define <17 x i8> @byte17() { +; CHECK-LABEL: byte17( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+12], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.b8 [func_retval0+16], %rs1; +; CHECK-NEXT: ret; + ret <17 x i8> zeroinitializer +} + +define <16 x i8> @byte16() { +; CHECK-LABEL: byte16( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.b32 %r1, 0; +; CHECK-NEXT: st.param.v4.b32 [func_retval0+0], {%r1, %r1, %r1, %r1}; +; CHECK-NEXT: ret; + ret <16 x i8> zeroinitializer +} + +define <15 x i8> @byte15() { +; CHECK-LABEL: byte15( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+12], {%rs1, %rs1}; +; CHECK-NEXT: st.param.b8 [func_retval0+14], %rs1; +; CHECK-NEXT: ret; + ret <15 x i8> zeroinitializer +} + +define <9 x i8> @byte9() { +; CHECK-LABEL: byte9( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs1; +; CHECK-NEXT: ret; + ret <9 x i8> zeroinitializer +} + +define <8 x i8> @byte8() { +; CHECK-LABEL: byte8( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.b32 %r1, 0; +; CHECK-NEXT: st.param.v2.b32 [func_retval0+0], {%r1, %r1}; +; CHECK-NEXT: ret; + ret <8 x i8> zeroinitializer +} + +define <7 x i8> @byte7() { +; CHECK-LABEL: byte7( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {%rs1, %rs1}; +; CHECK-NEXT: st.param.b8 [func_retval0+6], %rs1; +; CHECK-NEXT: ret; + ret <7 x i8> zeroinitializer +} + +define <5 x i8> @byte5() { +; CHECK-LABEL: byte5( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; +; CHECK-NEXT: ret; + ret <5 x i8> zeroinitializer +} + +define <4 x i8> @byte4() { +; CHECK-LABEL: byte4( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.b32 %r1, 0; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: ret; + ret <4 x i8> zeroinitializer +} + +define <3 x i8> @byte3() { +; CHECK-LABEL: byte3( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.b32 %r1, 0; +; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1; +; CHECK-NEXT: ret; + ret <3 x i8> zeroinitializer +} + +; FIXME: This test causes a crash. +; define <2 x i8> @byte2() { +; ret <2 x i8> zeroinitializer +; } + +define <1 x i8> @byte1() { +; CHECK-LABEL: byte1( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.b8 [func_retval0+0], %rs1; +; CHECK-NEXT: ret; + ret <1 x i8> zeroinitializer +} + +define <17 x i1> @bit17() { +; CHECK-LABEL: bit17( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+0], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.v4.b8 [func_retval0+12], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.b8 [func_retval0+16], %rs1; +; CHECK-NEXT: ret; + ret <17 x i1> zeroinitializer +} + +define <16 x i1> @bit16() { +; CHECK-LABEL: bit16( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {%rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+2], {%rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {%rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+6], {%rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+8], {%rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+10], {%rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+12], {%rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+14], {%rs1, %rs1}; +; CHECK-NEXT: ret; + ret <16 x i1> zeroinitializer +} + +define <15 x i1> @bit15() { +; CHECK-LABEL: bit15( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {%rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+2], {%rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {%rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+6], {%rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+8], {%rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+10], {%rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+12], {%rs1, %rs1}; +; CHECK-NEXT: st.param.b8 [func_retval0+14], %rs1; +; CHECK-NEXT: ret; + ret <15 x i1> zeroinitializer +} + +define <9 x i1> @bit9() { +; CHECK-LABEL: bit9( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {%rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+2], {%rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {%rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+6], {%rs1, %rs1}; +; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs1; +; CHECK-NEXT: ret; + ret <9 x i1> zeroinitializer +} + +define <8 x i1> @bit8() { +; CHECK-LABEL: bit8( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.b8 [func_retval0+0], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+5], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+6], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+7], %rs1; +; CHECK-NEXT: ret; + ret <8 x i1> zeroinitializer +} + +define <7 x i1> @bit7() { +; CHECK-LABEL: bit7( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.b8 [func_retval0+0], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+5], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+6], %rs1; +; CHECK-NEXT: ret; + ret <7 x i1> zeroinitializer +} + +define <5 x i1> @bit5() { +; CHECK-LABEL: bit5( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.b8 [func_retval0+0], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; +; CHECK-NEXT: ret; + ret <5 x i1> zeroinitializer +} + +define <4 x i1> @bit4() { +; CHECK-LABEL: bit4( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.b8 [func_retval0+0], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs1; +; CHECK-NEXT: ret; + ret <4 x i1> zeroinitializer +} + +define <3 x i1> @bit3() { +; CHECK-LABEL: bit3( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.b8 [func_retval0+0], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs1; +; CHECK-NEXT: ret; + ret <3 x i1> zeroinitializer +} + +define <2 x i1> @bit2() { +; CHECK-LABEL: bit2( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.b8 [func_retval0+0], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1; +; CHECK-NEXT: ret; + ret <2 x i1> zeroinitializer +} + +define <1 x i1> @bit1() { +; CHECK-LABEL: bit1( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u16 %rs1, 0; +; CHECK-NEXT: st.param.b8 [func_retval0+0], %rs1; +; CHECK-NEXT: ret; + ret <1 x i1> zeroinitializer +}