diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 3c6c97bb1fa10..1299582b2f5ea 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -4182,7 +4182,7 @@ builtin, the mangler emits their usual pattern without any special treatment. ----------------------- ``__builtin_popcountg`` returns the number of 1 bits in the argument. The -argument can be of any unsigned integer type. +argument can be of any unsigned integer type or fixed boolean vector. **Syntax**: @@ -4214,7 +4214,13 @@ such as ``unsigned __int128`` and C23 ``unsigned _BitInt(N)``. ``__builtin_clzg`` (respectively ``__builtin_ctzg``) returns the number of leading (respectively trailing) 0 bits in the first argument. The first argument -can be of any unsigned integer type. +can be of any unsigned integer type or fixed boolean vector. + +For boolean vectors, these builtins interpret the vector like a bit-field where +the ith element of the vector is bit i of the bit-field, counting from the +least significant end. ``__builtin_clzg`` returns the number of zero elements at +the end of the vector, while ``__builtin_ctzg`` returns the number of zero +elements at the start of the vector. If the first argument is 0 and an optional second argument of ``int`` type is provided, then the second argument is returned. If the first argument is 0, but diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index c32102d102cd3..3e495d8dfa136 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -143,6 +143,9 @@ Non-comprehensive list of changes in this release - Added ``__builtin_masked_load`` and ``__builtin_masked_store`` for conditional memory loads from vectors. Binds to the LLVM intrinsic of the same name. +- The ``__builtin_popcountg``, ``__builtin_ctzg``, and ``__builtin_clzg`` + functions now accept fixed-size boolean vectors. + - Use of ``__has_feature`` to detect the ``ptrauth_qualifier`` and ``ptrauth_intrinsics`` features has been deprecated, and is restricted to the arm64e target only. The correct method to check for these features is to test for the ``__PTRAUTH__`` diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 2cbebaf7b630e..79040d45cb010 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -141,6 +141,22 @@ static void diagnoseNonConstexprBuiltin(InterpState &S, CodePtr OpPC, S.CCEDiag(Loc, diag::note_invalid_subexpr_in_const_expr); } +static llvm::APSInt convertBoolVectorToInt(const Pointer &Val) { + assert(Val.getFieldDesc()->isPrimitiveArray() && + Val.getFieldDesc()->getElemQualType()->isBooleanType() && + "Not a boolean vector"); + unsigned NumElems = Val.getNumElems(); + + // Each element is one bit, so create an integer with NumElts bits. + llvm::APSInt Result(NumElems, 0); + for (unsigned I = 0; I != NumElems; ++I) { + if (Val.elem(I)) + Result.setBit(I); + } + + return Result; +} + static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call) { @@ -638,8 +654,14 @@ static bool interp__builtin_abs(InterpState &S, CodePtr OpPC, static bool interp__builtin_popcount(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call) { - PrimType ArgT = *S.getContext().classify(Call->getArg(0)->getType()); - APSInt Val = popToAPSInt(S.Stk, ArgT); + APSInt Val; + if (Call->getArg(0)->getType()->isExtVectorBoolType()) { + const Pointer &Arg = S.Stk.pop(); + Val = convertBoolVectorToInt(Arg); + } else { + PrimType ArgT = *S.getContext().classify(Call->getArg(0)->getType()); + Val = popToAPSInt(S.Stk, ArgT); + } pushInteger(S, Val.popcount(), Call->getType()); return true; } @@ -935,8 +957,14 @@ static bool interp__builtin_clz(InterpState &S, CodePtr OpPC, PrimType FallbackT = *S.getContext().classify(Call->getArg(1)); Fallback = popToAPSInt(S.Stk, FallbackT); } - PrimType ValT = *S.getContext().classify(Call->getArg(0)); - const APSInt &Val = popToAPSInt(S.Stk, ValT); + APSInt Val; + if (Call->getArg(0)->getType()->isExtVectorBoolType()) { + const Pointer &Arg = S.Stk.pop(); + Val = convertBoolVectorToInt(Arg); + } else { + PrimType ValT = *S.getContext().classify(Call->getArg(0)); + Val = popToAPSInt(S.Stk, ValT); + } // When the argument is 0, the result of GCC builtins is undefined, whereas // for Microsoft intrinsics, the result is the bit-width of the argument. @@ -966,8 +994,14 @@ static bool interp__builtin_ctz(InterpState &S, CodePtr OpPC, PrimType FallbackT = *S.getContext().classify(Call->getArg(1)); Fallback = popToAPSInt(S.Stk, FallbackT); } - PrimType ValT = *S.getContext().classify(Call->getArg(0)); - const APSInt &Val = popToAPSInt(S.Stk, ValT); + APSInt Val; + if (Call->getArg(0)->getType()->isExtVectorBoolType()) { + const Pointer &Arg = S.Stk.pop(); + Val = convertBoolVectorToInt(Arg); + } else { + PrimType ValT = *S.getContext().classify(Call->getArg(0)); + Val = popToAPSInt(S.Stk, ValT); + } if (Val == 0) { if (Fallback) { diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index a03e64fcffde2..e0d1b03dd6d2f 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11314,6 +11314,24 @@ static bool EvaluateVector(const Expr* E, APValue& Result, EvalInfo &Info) { return VectorExprEvaluator(Info, Result).Visit(E); } +static llvm::APInt ConvertBoolVectorToInt(const APValue &Val) { + assert(Val.isVector() && "expected vector APValue"); + unsigned NumElts = Val.getVectorLength(); + + // Each element is one bit, so create an integer with NumElts bits. + llvm::APInt Result(NumElts, 0); + + for (unsigned I = 0; I < NumElts; ++I) { + const APValue &Elt = Val.getVectorElt(I); + assert(Elt.isInt() && "expected integer element in bool vector"); + + if (Elt.getInt().getBoolValue()) + Result.setBit(I); + } + + return Result; +} + bool VectorExprEvaluator::VisitCastExpr(const CastExpr *E) { const VectorType *VTy = E->getType()->castAs(); unsigned NElts = VTy->getNumElements(); @@ -13456,8 +13474,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, case Builtin::BI__lzcnt: case Builtin::BI__lzcnt64: { APSInt Val; - if (!EvaluateInteger(E->getArg(0), Val, Info)) + if (E->getArg(0)->getType()->isExtVectorBoolType()) { + APValue Vec; + if (!EvaluateVector(E->getArg(0), Vec, Info)) + return false; + Val = ConvertBoolVectorToInt(Vec); + } else if (!EvaluateInteger(E->getArg(0), Val, Info)) { return false; + } std::optional Fallback; if ((BuiltinOp == Builtin::BI__builtin_clzg || @@ -13542,8 +13566,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, case Builtin::BI__builtin_ctzg: case Builtin::BI__builtin_elementwise_cttz: { APSInt Val; - if (!EvaluateInteger(E->getArg(0), Val, Info)) + if (E->getArg(0)->getType()->isExtVectorBoolType()) { + APValue Vec; + if (!EvaluateVector(E->getArg(0), Vec, Info)) + return false; + Val = ConvertBoolVectorToInt(Vec); + } else if (!EvaluateInteger(E->getArg(0), Val, Info)) { return false; + } std::optional Fallback; if ((BuiltinOp == Builtin::BI__builtin_ctzg || @@ -13758,8 +13788,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, case Builtin::BI__popcnt: case Builtin::BI__popcnt64: { APSInt Val; - if (!EvaluateInteger(E->getArg(0), Val, Info)) + if (E->getArg(0)->getType()->isExtVectorBoolType()) { + APValue Vec; + if (!EvaluateVector(E->getArg(0), Vec, Info)) + return false; + Val = ConvertBoolVectorToInt(Vec); + } else if (!EvaluateInteger(E->getArg(0), Val, Info)) { return false; + } return Success(Val.popcount(), E); } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index d9cc37d123fb4..0979104e945a8 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1693,6 +1693,23 @@ getBitTestAtomicOrdering(BitTest::InterlockingKind I) { llvm_unreachable("invalid interlocking"); } +static llvm::Value *EmitBitCountExpr(CodeGenFunction &CGF, const Expr *E) { + llvm::Value *ArgValue = CGF.EmitScalarExpr(E); + llvm::Type *ArgType = ArgValue->getType(); + + // Boolean vectors can be casted directly to its bitfield representation. We + // intentionally do not round up to the next power of two size and let LLVM + // handle the trailing bits. + if (auto *VT = dyn_cast(ArgType); + VT && VT->getElementType()->isIntegerTy(1)) { + llvm::Type *StorageType = + llvm::Type::getIntNTy(CGF.getLLVMContext(), VT->getNumElements()); + ArgValue = CGF.Builder.CreateBitCast(ArgValue, StorageType); + } + + return ArgValue; +} + /// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of /// bits and a bit position and read and optionally modify the bit at that /// position. The position index can be arbitrarily large, i.e. it can be larger @@ -2020,7 +2037,7 @@ Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) && "Unsupported builtin check kind"); - Value *ArgValue = EmitScalarExpr(E); + Value *ArgValue = EmitBitCountExpr(*this, E); if (!SanOpts.has(SanitizerKind::Builtin)) return ArgValue; @@ -3334,7 +3351,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, E->getNumArgs() > 1; Value *ArgValue = - HasFallback ? EmitScalarExpr(E->getArg(0)) + HasFallback ? EmitBitCountExpr(*this, E->getArg(0)) : EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero); llvm::Type *ArgType = ArgValue->getType(); @@ -3371,7 +3388,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, E->getNumArgs() > 1; Value *ArgValue = - HasFallback ? EmitScalarExpr(E->getArg(0)) + HasFallback ? EmitBitCountExpr(*this, E->getArg(0)) : EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero); llvm::Type *ArgType = ArgValue->getType(); @@ -3456,7 +3473,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_popcountl: case Builtin::BI__builtin_popcountll: case Builtin::BI__builtin_popcountg: { - Value *ArgValue = EmitScalarExpr(E->getArg(0)); + Value *ArgValue = EmitBitCountExpr(*this, E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 2944c1a09b32c..e343d77503cc2 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2214,7 +2214,7 @@ static bool BuiltinPopcountg(Sema &S, CallExpr *TheCall) { QualType ArgTy = Arg->getType(); - if (!ArgTy->isUnsignedIntegerType()) { + if (!ArgTy->isUnsignedIntegerType() && !ArgTy->isExtVectorBoolType()) { S.Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type) << 1 << /* scalar */ 1 << /* unsigned integer ty */ 3 << /* no fp */ 0 << ArgTy; @@ -2239,7 +2239,7 @@ static bool BuiltinCountZeroBitsGeneric(Sema &S, CallExpr *TheCall) { QualType Arg0Ty = Arg0->getType(); - if (!Arg0Ty->isUnsignedIntegerType()) { + if (!Arg0Ty->isUnsignedIntegerType() && !Arg0Ty->isExtVectorBoolType()) { S.Diag(Arg0->getBeginLoc(), diag::err_builtin_invalid_arg_type) << 1 << /* scalar */ 1 << /* unsigned integer ty */ 3 << /* no fp */ 0 << Arg0Ty; diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp index 3277ef65a880b..f47bc49d9a1a8 100644 --- a/clang/test/AST/ByteCode/builtin-functions.cpp +++ b/clang/test/AST/ByteCode/builtin-functions.cpp @@ -454,6 +454,7 @@ namespace SourceLocation { } #define BITSIZE(x) (sizeof(x) * 8) +constexpr bool __attribute__((ext_vector_type(4))) v4b{}; namespace popcount { static_assert(__builtin_popcount(~0u) == __CHAR_BIT__ * sizeof(unsigned int), ""); static_assert(__builtin_popcount(0) == 0, ""); @@ -471,6 +472,7 @@ namespace popcount { static_assert(__builtin_popcountg(0ul) == 0, ""); static_assert(__builtin_popcountg(~0ull) == __CHAR_BIT__ * sizeof(unsigned long long), ""); static_assert(__builtin_popcountg(0ull) == 0, ""); + static_assert(__builtin_popcountg(v4b) == 0, ""); #ifdef __SIZEOF_INT128__ static_assert(__builtin_popcountg(~(unsigned __int128)0) == __CHAR_BIT__ * sizeof(unsigned __int128), ""); static_assert(__builtin_popcountg((unsigned __int128)0) == 0, ""); @@ -743,6 +745,7 @@ namespace clz { char clz62[__builtin_clzg((unsigned _BitInt(128))0xf) == BITSIZE(_BitInt(128)) - 4 ? 1 : -1]; char clz63[__builtin_clzg((unsigned _BitInt(128))0xf, 42) == BITSIZE(_BitInt(128)) - 4 ? 1 : -1]; #endif + char clz64[__builtin_clzg(v4b, 0) == 0 ? 1 : -1]; } namespace ctz { @@ -813,6 +816,7 @@ namespace ctz { char ctz62[__builtin_ctzg((unsigned _BitInt(128))1 << (BITSIZE(_BitInt(128)) - 1)) == BITSIZE(_BitInt(128)) - 1 ? 1 : -1]; char ctz63[__builtin_ctzg((unsigned _BitInt(128))1 << (BITSIZE(_BitInt(128)) - 1), 42) == BITSIZE(_BitInt(128)) - 1 ? 1 : -1]; #endif + char clz64[__builtin_ctzg(v4b, 0) == 0 ? 1 : -1]; } namespace bswap { diff --git a/clang/test/CodeGen/builtins.c b/clang/test/CodeGen/builtins.c index aa9965b815983..7ad143ed165c8 100644 --- a/clang/test/CodeGen/builtins.c +++ b/clang/test/CodeGen/builtins.c @@ -991,247 +991,288 @@ void test_builtin_os_log_long_double(void *buf, long double ld) { void test_builtin_popcountg(unsigned char uc, unsigned short us, unsigned int ui, unsigned long ul, unsigned long long ull, unsigned __int128 ui128, - unsigned _BitInt(128) ubi128) { + unsigned _BitInt(128) ubi128, + _Bool __attribute__((ext_vector_type(8))) vb8) { volatile int pop; - pop = __builtin_popcountg(uc); - // CHECK: %1 = load i8, ptr %uc.addr, align 1 - // CHECK-NEXT: %2 = call i8 @llvm.ctpop.i8(i8 %1) - // CHECK-NEXT: %cast = zext i8 %2 to i32 + // CHECK: %2 = load i8, ptr %uc.addr, align 1 + // CHECK-NEXT: %3 = call i8 @llvm.ctpop.i8(i8 %2) + // CHECK-NEXT: %cast = zext i8 %3 to i32 // CHECK-NEXT: store volatile i32 %cast, ptr %pop, align 4 + pop = __builtin_popcountg(uc); + // CHECK: %4 = load i16, ptr %us.addr, align 2 + // CHECK-NEXT: %5 = call i16 @llvm.ctpop.i16(i16 %4) + // CHECK-NEXT: %cast2 = zext i16 %5 to i32 + // CHECK-NEXT: store volatile i32 %cast2, ptr %pop, align 4 pop = __builtin_popcountg(us); - // CHECK-NEXT: %3 = load i16, ptr %us.addr, align 2 - // CHECK-NEXT: %4 = call i16 @llvm.ctpop.i16(i16 %3) - // CHECK-NEXT: %cast1 = zext i16 %4 to i32 - // CHECK-NEXT: store volatile i32 %cast1, ptr %pop, align 4 + // CHECK: %6 = load i32, ptr %ui.addr, align 4 + // CHECK-NEXT: %7 = call i32 @llvm.ctpop.i32(i32 %6) + // CHECK-NEXT: store volatile i32 %7, ptr %pop, align 4 pop = __builtin_popcountg(ui); - // CHECK-NEXT: %5 = load i32, ptr %ui.addr, align 4 - // CHECK-NEXT: %6 = call i32 @llvm.ctpop.i32(i32 %5) - // CHECK-NEXT: store volatile i32 %6, ptr %pop, align 4 + // CHECK: %8 = load i64, ptr %ul.addr, align 8 + // CHECK-NEXT: %9 = call i64 @llvm.ctpop.i64(i64 %8) + // CHECK-NEXT: %cast3 = trunc i64 %9 to i32 + // CHECK-NEXT: store volatile i32 %cast3, ptr %pop, align 4 pop = __builtin_popcountg(ul); - // CHECK-NEXT: %7 = load i64, ptr %ul.addr, align 8 - // CHECK-NEXT: %8 = call i64 @llvm.ctpop.i64(i64 %7) - // CHECK-NEXT: %cast2 = trunc i64 %8 to i32 - // CHECK-NEXT: store volatile i32 %cast2, ptr %pop, align 4 + // CHECK: %10 = load i64, ptr %ull.addr, align 8 + // CHECK-NEXT: %11 = call i64 @llvm.ctpop.i64(i64 %10) + // CHECK-NEXT: %cast4 = trunc i64 %11 to i32 + // CHECK-NEXT: store volatile i32 %cast4, ptr %pop, align 4 pop = __builtin_popcountg(ull); - // CHECK-NEXT: %9 = load i64, ptr %ull.addr, align 8 - // CHECK-NEXT: %10 = call i64 @llvm.ctpop.i64(i64 %9) - // CHECK-NEXT: %cast3 = trunc i64 %10 to i32 - // CHECK-NEXT: store volatile i32 %cast3, ptr %pop, align 4 + // CHECK: %12 = load i128, ptr %ui128.addr, align 16 + // CHECK-NEXT: %13 = call i128 @llvm.ctpop.i128(i128 %12) + // CHECK-NEXT: %cast5 = trunc i128 %13 to i32 + // CHECK-NEXT: store volatile i32 %cast5, ptr %pop, align 4 pop = __builtin_popcountg(ui128); - // CHECK-NEXT: %11 = load i128, ptr %ui128.addr, align 16 - // CHECK-NEXT: %12 = call i128 @llvm.ctpop.i128(i128 %11) - // CHECK-NEXT: %cast4 = trunc i128 %12 to i32 - // CHECK-NEXT: store volatile i32 %cast4, ptr %pop, align 4 + // CHECK: %14 = load i128, ptr %ubi128.addr, align 8 + // CHECK-NEXT: %15 = call i128 @llvm.ctpop.i128(i128 %14) + // CHECK-NEXT: %cast6 = trunc i128 %15 to i32 + // CHECK-NEXT: store volatile i32 %cast6, ptr %pop, align 4 pop = __builtin_popcountg(ubi128); - // CHECK-NEXT: %13 = load i128, ptr %ubi128.addr, align 8 - // CHECK-NEXT: %14 = call i128 @llvm.ctpop.i128(i128 %13) - // CHECK-NEXT: %cast5 = trunc i128 %14 to i32 - // CHECK-NEXT: store volatile i32 %cast5, ptr %pop, align 4 - // CHECK-NEXT: ret void + // CHECK: %load_bits7 = load i8, ptr %vb8.addr, align 1 + // CHECK-NEXT: %16 = bitcast i8 %load_bits7 to <8 x i1> + // CHECK-NEXT: %17 = bitcast <8 x i1> %16 to i8 + // CHECK-NEXT: %18 = call i8 @llvm.ctpop.i8(i8 %17) + // CHECK-NEXT: %cast8 = zext i8 %18 to i32 + // CHECK-NEXT: store volatile i32 %cast8, ptr %pop, align 4 + pop = __builtin_popcountg(vb8); } // CHECK-LABEL: define{{.*}} void @test_builtin_clzg void test_builtin_clzg(unsigned char uc, unsigned short us, unsigned int ui, unsigned long ul, unsigned long long ull, unsigned __int128 ui128, unsigned _BitInt(128) ubi128, - signed char sc, short s, int i) { + signed char sc, short s, int i, + _Bool __attribute__((ext_vector_type(8))) vb8) { volatile int lz; + // CHECK: %2 = load i8, ptr %uc.addr, align 1 + // CHECK-NEXT: %3 = call i8 @llvm.ctlz.i8(i8 %2, i1 true) + // CHECK-NEXT: %cast = zext i8 %3 to i32 + // CHECK-NEXT: store volatile i32 %cast, ptr %lz, align 4 lz = __builtin_clzg(uc); - // CHECK: %1 = load i8, ptr %uc.addr, align 1 - // CHECK-NEXT: %2 = call i8 @llvm.ctlz.i8(i8 %1, i1 true) - // CHECK-NEXT: %cast = zext i8 %2 to i32 - // CHECK-NEXT: store volatile i32 %cast, ptr %lz, align 4 + // CHECK-NEXT: %4 = load i16, ptr %us.addr, align 2 + // CHECK-NEXT: %5 = call i16 @llvm.ctlz.i16(i16 %4, i1 true) + // CHECK-NEXT: %cast2 = zext i16 %5 to i32 + // CHECK-NEXT: store volatile i32 %cast2, ptr %lz, align 4 lz = __builtin_clzg(us); - // CHECK-NEXT: %3 = load i16, ptr %us.addr, align 2 - // CHECK-NEXT: %4 = call i16 @llvm.ctlz.i16(i16 %3, i1 true) - // CHECK-NEXT: %cast1 = zext i16 %4 to i32 - // CHECK-NEXT: store volatile i32 %cast1, ptr %lz, align 4 + // CHECK-NEXT: %6 = load i32, ptr %ui.addr, align 4 + // CHECK-NEXT: %7 = call i32 @llvm.ctlz.i32(i32 %6, i1 true) + // CHECK-NEXT: store volatile i32 %7, ptr %lz, align 4 lz = __builtin_clzg(ui); - // CHECK-NEXT: %5 = load i32, ptr %ui.addr, align 4 - // CHECK-NEXT: %6 = call i32 @llvm.ctlz.i32(i32 %5, i1 true) - // CHECK-NEXT: store volatile i32 %6, ptr %lz, align 4 + // CHECK-NEXT: %8 = load i64, ptr %ul.addr, align 8 + // CHECK-NEXT: %9 = call i64 @llvm.ctlz.i64(i64 %8, i1 true) + // CHECK-NEXT: %cast3 = trunc i64 %9 to i32 + // CHECK-NEXT: store volatile i32 %cast3, ptr %lz, align 4 lz = __builtin_clzg(ul); - // CHECK-NEXT: %7 = load i64, ptr %ul.addr, align 8 - // CHECK-NEXT: %8 = call i64 @llvm.ctlz.i64(i64 %7, i1 true) - // CHECK-NEXT: %cast2 = trunc i64 %8 to i32 - // CHECK-NEXT: store volatile i32 %cast2, ptr %lz, align 4 + // CHECK-NEXT: %10 = load i64, ptr %ull.addr, align 8 + // CHECK-NEXT: %11 = call i64 @llvm.ctlz.i64(i64 %10, i1 true) + // CHECK-NEXT: %cast4 = trunc i64 %11 to i32 + // CHECK-NEXT: store volatile i32 %cast4, ptr %lz, align 4 lz = __builtin_clzg(ull); - // CHECK-NEXT: %9 = load i64, ptr %ull.addr, align 8 - // CHECK-NEXT: %10 = call i64 @llvm.ctlz.i64(i64 %9, i1 true) - // CHECK-NEXT: %cast3 = trunc i64 %10 to i32 - // CHECK-NEXT: store volatile i32 %cast3, ptr %lz, align 4 + // CHECK-NEXT: %12 = load i128, ptr %ui128.addr, align 16 + // CHECK-NEXT: %13 = call i128 @llvm.ctlz.i128(i128 %12, i1 true) + // CHECK-NEXT: %cast5 = trunc i128 %13 to i32 + // CHECK-NEXT: store volatile i32 %cast5, ptr %lz, align 4 lz = __builtin_clzg(ui128); - // CHECK-NEXT: %11 = load i128, ptr %ui128.addr, align 16 - // CHECK-NEXT: %12 = call i128 @llvm.ctlz.i128(i128 %11, i1 true) - // CHECK-NEXT: %cast4 = trunc i128 %12 to i32 - // CHECK-NEXT: store volatile i32 %cast4, ptr %lz, align 4 + // CHECK-NEXT: %14 = load i128, ptr %ubi128.addr, align 8 + // CHECK-NEXT: %15 = call i128 @llvm.ctlz.i128(i128 %14, i1 true) + // CHECK-NEXT: %cast6 = trunc i128 %15 to i32 + // CHECK-NEXT: store volatile i32 %cast6, ptr %lz, align 4 lz = __builtin_clzg(ubi128); - // CHECK-NEXT: %13 = load i128, ptr %ubi128.addr, align 8 - // CHECK-NEXT: %14 = call i128 @llvm.ctlz.i128(i128 %13, i1 true) - // CHECK-NEXT: %cast5 = trunc i128 %14 to i32 - // CHECK-NEXT: store volatile i32 %cast5, ptr %lz, align 4 + // CHECK-NEXT: %load_bits7 = load i8, ptr %vb8.addr, align 1 + // CHECK-NEXT: %16 = bitcast i8 %load_bits7 to <8 x i1> + // CHECK-NEXT: %17 = bitcast <8 x i1> %16 to i8 + // CHECK-NEXT: %18 = call i8 @llvm.ctlz.i8(i8 %17, i1 true) + // CHECK-NEXT: %cast8 = zext i8 %18 to i32 + // CHECK-NEXT: store volatile i32 %cast8, ptr %lz, align 4 + lz = __builtin_clzg(vb8); + // CHECK-NEXT: %19 = load i8, ptr %uc.addr, align 1 + // CHECK-NEXT: %20 = call i8 @llvm.ctlz.i8(i8 %19, i1 true) + // CHECK-NEXT: %cast9 = zext i8 %20 to i32 + // CHECK-NEXT: %iszero = icmp eq i8 %19, 0 + // CHECK-NEXT: %21 = load i8, ptr %sc.addr, align 1 + // CHECK-NEXT: %conv = sext i8 %21 to i32 + // CHECK-NEXT: %clzg = select i1 %iszero, i32 %conv, i32 %cast9 + // CHECK-NEXT: store volatile i32 %clzg, ptr %lz, align 4 lz = __builtin_clzg(uc, sc); - // CHECK-NEXT: %15 = load i8, ptr %uc.addr, align 1 - // CHECK-NEXT: %16 = call i8 @llvm.ctlz.i8(i8 %15, i1 true) - // CHECK-NEXT: %cast6 = zext i8 %16 to i32 - // CHECK-NEXT: %iszero = icmp eq i8 %15, 0 - // CHECK-NEXT: %17 = load i8, ptr %sc.addr, align 1 - // CHECK-NEXT: %conv = sext i8 %17 to i32 - // CHECK-NEXT: %clzg = select i1 %iszero, i32 %conv, i32 %cast6 - // CHECK-NEXT: store volatile i32 %clzg, ptr %lz, align 4 + // CHECK-NEXT: %22 = load i16, ptr %us.addr, align 2 + // CHECK-NEXT: %23 = call i16 @llvm.ctlz.i16(i16 %22, i1 true) + // CHECK-NEXT: %cast10 = zext i16 %23 to i32 + // CHECK-NEXT: %iszero11 = icmp eq i16 %22, 0 + // CHECK-NEXT: %24 = load i8, ptr %uc.addr, align 1 + // CHECK-NEXT: %conv12 = zext i8 %24 to i32 + // CHECK-NEXT: %clzg13 = select i1 %iszero11, i32 %conv12, i32 %cast10 + // CHECK-NEXT: store volatile i32 %clzg13, ptr %lz, align 4 lz = __builtin_clzg(us, uc); - // CHECK-NEXT: %18 = load i16, ptr %us.addr, align 2 - // CHECK-NEXT: %19 = call i16 @llvm.ctlz.i16(i16 %18, i1 true) - // CHECK-NEXT: %cast7 = zext i16 %19 to i32 - // CHECK-NEXT: %iszero8 = icmp eq i16 %18, 0 - // CHECK-NEXT: %20 = load i8, ptr %uc.addr, align 1 - // CHECK-NEXT: %conv9 = zext i8 %20 to i32 - // CHECK-NEXT: %clzg10 = select i1 %iszero8, i32 %conv9, i32 %cast7 - // CHECK-NEXT: store volatile i32 %clzg10, ptr %lz, align 4 + // CHECK-NEXT: %25 = load i32, ptr %ui.addr, align 4 + // CHECK-NEXT: %26 = call i32 @llvm.ctlz.i32(i32 %25, i1 true) + // CHECK-NEXT: %iszero14 = icmp eq i32 %25, 0 + // CHECK-NEXT: %27 = load i16, ptr %s.addr, align 2 + // CHECK-NEXT: %conv15 = sext i16 %27 to i32 + // CHECK-NEXT: %clzg16 = select i1 %iszero14, i32 %conv15, i32 %26 + // CHECK-NEXT: store volatile i32 %clzg16, ptr %lz, align 4 lz = __builtin_clzg(ui, s); - // CHECK-NEXT: %21 = load i32, ptr %ui.addr, align 4 - // CHECK-NEXT: %22 = call i32 @llvm.ctlz.i32(i32 %21, i1 true) - // CHECK-NEXT: %iszero11 = icmp eq i32 %21, 0 - // CHECK-NEXT: %23 = load i16, ptr %s.addr, align 2 - // CHECK-NEXT: %conv12 = sext i16 %23 to i32 - // CHECK-NEXT: %clzg13 = select i1 %iszero11, i32 %conv12, i32 %22 - // CHECK-NEXT: store volatile i32 %clzg13, ptr %lz, align 4 + // CHECK-NEXT: %28 = load i64, ptr %ul.addr, align 8 + // CHECK-NEXT: %29 = call i64 @llvm.ctlz.i64(i64 %28, i1 true) + // CHECK-NEXT: %cast17 = trunc i64 %29 to i32 + // CHECK-NEXT: %iszero18 = icmp eq i64 %28, 0 + // CHECK-NEXT: %30 = load i16, ptr %us.addr, align 2 + // CHECK-NEXT: %conv19 = zext i16 %30 to i32 + // CHECK-NEXT: %clzg20 = select i1 %iszero18, i32 %conv19, i32 %cast17 + // CHECK-NEXT: store volatile i32 %clzg20, ptr %lz, align 4 lz = __builtin_clzg(ul, us); - // CHECK-NEXT: %24 = load i64, ptr %ul.addr, align 8 - // CHECK-NEXT: %25 = call i64 @llvm.ctlz.i64(i64 %24, i1 true) - // CHECK-NEXT: %cast14 = trunc i64 %25 to i32 - // CHECK-NEXT: %iszero15 = icmp eq i64 %24, 0 - // CHECK-NEXT: %26 = load i16, ptr %us.addr, align 2 - // CHECK-NEXT: %conv16 = zext i16 %26 to i32 - // CHECK-NEXT: %clzg17 = select i1 %iszero15, i32 %conv16, i32 %cast14 - // CHECK-NEXT: store volatile i32 %clzg17, ptr %lz, align 4 + // CHECK-NEXT: %31 = load i64, ptr %ull.addr, align 8 + // CHECK-NEXT: %32 = call i64 @llvm.ctlz.i64(i64 %31, i1 true) + // CHECK-NEXT: %cast21 = trunc i64 %32 to i32 + // CHECK-NEXT: %iszero22 = icmp eq i64 %31, 0 + // CHECK-NEXT: %33 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %clzg23 = select i1 %iszero22, i32 %33, i32 %cast21 + // CHECK-NEXT: store volatile i32 %clzg23, ptr %lz, align 4 lz = __builtin_clzg(ull, i); - // CHECK-NEXT: %27 = load i64, ptr %ull.addr, align 8 - // CHECK-NEXT: %28 = call i64 @llvm.ctlz.i64(i64 %27, i1 true) - // CHECK-NEXT: %cast18 = trunc i64 %28 to i32 - // CHECK-NEXT: %iszero19 = icmp eq i64 %27, 0 - // CHECK-NEXT: %29 = load i32, ptr %i.addr, align 4 - // CHECK-NEXT: %clzg20 = select i1 %iszero19, i32 %29, i32 %cast18 - // CHECK-NEXT: store volatile i32 %clzg20, ptr %lz, align 4 + // CHECK-NEXT: %34 = load i128, ptr %ui128.addr, align 16 + // CHECK-NEXT: %35 = call i128 @llvm.ctlz.i128(i128 %34, i1 true) + // CHECK-NEXT: %cast24 = trunc i128 %35 to i32 + // CHECK-NEXT: %iszero25 = icmp eq i128 %34, 0 + // CHECK-NEXT: %36 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %clzg26 = select i1 %iszero25, i32 %36, i32 %cast24 + // CHECK-NEXT: store volatile i32 %clzg26, ptr %lz, align 4 lz = __builtin_clzg(ui128, i); - // CHECK-NEXT: %30 = load i128, ptr %ui128.addr, align 16 - // CHECK-NEXT: %31 = call i128 @llvm.ctlz.i128(i128 %30, i1 true) - // CHECK-NEXT: %cast21 = trunc i128 %31 to i32 - // CHECK-NEXT: %iszero22 = icmp eq i128 %30, 0 - // CHECK-NEXT: %32 = load i32, ptr %i.addr, align 4 - // CHECK-NEXT: %clzg23 = select i1 %iszero22, i32 %32, i32 %cast21 - // CHECK-NEXT: store volatile i32 %clzg23, ptr %lz, align 4 + // CHECK-NEXT: %37 = load i128, ptr %ubi128.addr, align 8 + // CHECK-NEXT: %38 = call i128 @llvm.ctlz.i128(i128 %37, i1 true) + // CHECK-NEXT: %cast27 = trunc i128 %38 to i32 + // CHECK-NEXT: %iszero28 = icmp eq i128 %37, 0 + // CHECK-NEXT: %39 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %clzg29 = select i1 %iszero28, i32 %39, i32 %cast27 + // CHECK-NEXT: store volatile i32 %clzg29, ptr %lz, align 4 lz = __builtin_clzg(ubi128, i); - // CHECK-NEXT: %33 = load i128, ptr %ubi128.addr, align 8 - // CHECK-NEXT: %34 = call i128 @llvm.ctlz.i128(i128 %33, i1 true) - // CHECK-NEXT: %cast24 = trunc i128 %34 to i32 - // CHECK-NEXT: %iszero25 = icmp eq i128 %33, 0 - // CHECK-NEXT: %35 = load i32, ptr %i.addr, align 4 - // CHECK-NEXT: %clzg26 = select i1 %iszero25, i32 %35, i32 %cast24 - // CHECK-NEXT: store volatile i32 %clzg26, ptr %lz, align 4 - // CHECK-NEXT: ret void + // CHECK-NEXT: %load_bits30 = load i8, ptr %vb8.addr, align 1 + // CHECK-NEXT: %40 = bitcast i8 %load_bits30 to <8 x i1> + // CHECK-NEXT: %41 = bitcast <8 x i1> %40 to i8 + // CHECK-NEXT: %42 = call i8 @llvm.ctlz.i8(i8 %41, i1 true) + // CHECK-NEXT: %cast31 = zext i8 %42 to i32 + // CHECK-NEXT: %iszero32 = icmp eq i8 %41, 0 + // CHECK-NEXT: %43 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %clzg33 = select i1 %iszero32, i32 %43, i32 %cast31 + // CHECK-NEXT: store volatile i32 %clzg33, ptr %lz, align 4 + lz = __builtin_clzg(vb8, i); } // CHECK-LABEL: define{{.*}} void @test_builtin_ctzg void test_builtin_ctzg(unsigned char uc, unsigned short us, unsigned int ui, unsigned long ul, unsigned long long ull, unsigned __int128 ui128, unsigned _BitInt(128) ubi128, - signed char sc, short s, int i) { + signed char sc, short s, int i, + _Bool __attribute__((ext_vector_type(8))) vb8) { volatile int tz; - tz = __builtin_ctzg(uc); - // CHECK: %1 = load i8, ptr %uc.addr, align 1 - // CHECK-NEXT: %2 = call i8 @llvm.cttz.i8(i8 %1, i1 true) - // CHECK-NEXT: %cast = zext i8 %2 to i32 + // CHECK: %2 = load i8, ptr %uc.addr, align 1 + // CHECK-NEXT: %3 = call i8 @llvm.cttz.i8(i8 %2, i1 true) + // CHECK-NEXT: %cast = zext i8 %3 to i32 // CHECK-NEXT: store volatile i32 %cast, ptr %tz, align 4 + tz = __builtin_ctzg(uc); + // CHECK-NEXT: %4 = load i16, ptr %us.addr, align 2 + // CHECK-NEXT: %5 = call i16 @llvm.cttz.i16(i16 %4, i1 true) + // CHECK-NEXT: %cast2 = zext i16 %5 to i32 + // CHECK-NEXT: store volatile i32 %cast2, ptr %tz, align 4 tz = __builtin_ctzg(us); - // CHECK-NEXT: %3 = load i16, ptr %us.addr, align 2 - // CHECK-NEXT: %4 = call i16 @llvm.cttz.i16(i16 %3, i1 true) - // CHECK-NEXT: %cast1 = zext i16 %4 to i32 - // CHECK-NEXT: store volatile i32 %cast1, ptr %tz, align 4 + // CHECK-NEXT: %6 = load i32, ptr %ui.addr, align 4 + // CHECK-NEXT: %7 = call i32 @llvm.cttz.i32(i32 %6, i1 true) + // CHECK-NEXT: store volatile i32 %7, ptr %tz, align 4 tz = __builtin_ctzg(ui); - // CHECK-NEXT: %5 = load i32, ptr %ui.addr, align 4 - // CHECK-NEXT: %6 = call i32 @llvm.cttz.i32(i32 %5, i1 true) - // CHECK-NEXT: store volatile i32 %6, ptr %tz, align 4 + // CHECK-NEXT: %8 = load i64, ptr %ul.addr, align 8 + // CHECK-NEXT: %9 = call i64 @llvm.cttz.i64(i64 %8, i1 true) + // CHECK-NEXT: %cast3 = trunc i64 %9 to i32 + // CHECK-NEXT: store volatile i32 %cast3, ptr %tz, align 4 tz = __builtin_ctzg(ul); - // CHECK-NEXT: %7 = load i64, ptr %ul.addr, align 8 - // CHECK-NEXT: %8 = call i64 @llvm.cttz.i64(i64 %7, i1 true) - // CHECK-NEXT: %cast2 = trunc i64 %8 to i32 - // CHECK-NEXT: store volatile i32 %cast2, ptr %tz, align 4 + // CHECK-NEXT: %10 = load i64, ptr %ull.addr, align 8 + // CHECK-NEXT: %11 = call i64 @llvm.cttz.i64(i64 %10, i1 true) + // CHECK-NEXT: %cast4 = trunc i64 %11 to i32 + // CHECK-NEXT: store volatile i32 %cast4, ptr %tz, align 4 tz = __builtin_ctzg(ull); - // CHECK-NEXT: %9 = load i64, ptr %ull.addr, align 8 - // CHECK-NEXT: %10 = call i64 @llvm.cttz.i64(i64 %9, i1 true) - // CHECK-NEXT: %cast3 = trunc i64 %10 to i32 - // CHECK-NEXT: store volatile i32 %cast3, ptr %tz, align 4 + // CHECK-NEXT: %12 = load i128, ptr %ui128.addr, align 16 + // CHECK-NEXT: %13 = call i128 @llvm.cttz.i128(i128 %12, i1 true) + // CHECK-NEXT: %cast5 = trunc i128 %13 to i32 + // CHECK-NEXT: store volatile i32 %cast5, ptr %tz, align 4 tz = __builtin_ctzg(ui128); - // CHECK-NEXT: %11 = load i128, ptr %ui128.addr, align 16 - // CHECK-NEXT: %12 = call i128 @llvm.cttz.i128(i128 %11, i1 true) - // CHECK-NEXT: %cast4 = trunc i128 %12 to i32 - // CHECK-NEXT: store volatile i32 %cast4, ptr %tz, align 4 + // CHECK-NEXT: %14 = load i128, ptr %ubi128.addr, align 8 + // CHECK-NEXT: %15 = call i128 @llvm.cttz.i128(i128 %14, i1 true) + // CHECK-NEXT: %cast6 = trunc i128 %15 to i32 + // CHECK-NEXT: store volatile i32 %cast6, ptr %tz, align 4 tz = __builtin_ctzg(ubi128); - // CHECK-NEXT: %13 = load i128, ptr %ubi128.addr, align 8 - // CHECK-NEXT: %14 = call i128 @llvm.cttz.i128(i128 %13, i1 true) - // CHECK-NEXT: %cast5 = trunc i128 %14 to i32 - // CHECK-NEXT: store volatile i32 %cast5, ptr %tz, align 4 - tz = __builtin_ctzg(uc, sc); - // CHECK-NEXT: %15 = load i8, ptr %uc.addr, align 1 - // CHECK-NEXT: %16 = call i8 @llvm.cttz.i8(i8 %15, i1 true) - // CHECK-NEXT: %cast6 = zext i8 %16 to i32 - // CHECK-NEXT: %iszero = icmp eq i8 %15, 0 - // CHECK-NEXT: %17 = load i8, ptr %sc.addr, align 1 - // CHECK-NEXT: %conv = sext i8 %17 to i32 - // CHECK-NEXT: %ctzg = select i1 %iszero, i32 %conv, i32 %cast6 + // CHECK-NEXT: %load_bits7 = load i8, ptr %vb8.addr, align 1 + // CHECK-NEXT: %16 = bitcast i8 %load_bits7 to <8 x i1> + // CHECK-NEXT: %17 = bitcast <8 x i1> %16 to i8 + // CHECK-NEXT: %18 = call i8 @llvm.cttz.i8(i8 %17, i1 true) + // CHECK-NEXT: %cast8 = zext i8 %18 to i32 + // CHECK-NEXT: store volatile i32 %cast8, ptr %tz, align 4 + tz = __builtin_ctzg(vb8); + // CHECK-NEXT: %19 = load i8, ptr %uc.addr, align 1 + // CHECK-NEXT: %20 = call i8 @llvm.cttz.i8(i8 %19, i1 true) + // CHECK-NEXT: %cast9 = zext i8 %20 to i32 + // CHECK-NEXT: %iszero = icmp eq i8 %19, 0 + // CHECK-NEXT: %21 = load i8, ptr %sc.addr, align 1 + // CHECK-NEXT: %conv = sext i8 %21 to i32 + // CHECK-NEXT: %ctzg = select i1 %iszero, i32 %conv, i32 %cast9 // CHECK-NEXT: store volatile i32 %ctzg, ptr %tz, align 4 + tz = __builtin_ctzg(uc, sc); + // CHECK-NEXT: %22 = load i16, ptr %us.addr, align 2 + // CHECK-NEXT: %23 = call i16 @llvm.cttz.i16(i16 %22, i1 true) + // CHECK-NEXT: %cast10 = zext i16 %23 to i32 + // CHECK-NEXT: %iszero11 = icmp eq i16 %22, 0 + // CHECK-NEXT: %24 = load i8, ptr %uc.addr, align 1 + // CHECK-NEXT: %conv12 = zext i8 %24 to i32 + // CHECK-NEXT: %ctzg13 = select i1 %iszero11, i32 %conv12, i32 %cast10 + // CHECK-NEXT: store volatile i32 %ctzg13, ptr %tz, align 4 tz = __builtin_ctzg(us, uc); - // CHECK-NEXT: %18 = load i16, ptr %us.addr, align 2 - // CHECK-NEXT: %19 = call i16 @llvm.cttz.i16(i16 %18, i1 true) - // CHECK-NEXT: %cast7 = zext i16 %19 to i32 - // CHECK-NEXT: %iszero8 = icmp eq i16 %18, 0 - // CHECK-NEXT: %20 = load i8, ptr %uc.addr, align 1 - // CHECK-NEXT: %conv9 = zext i8 %20 to i32 - // CHECK-NEXT: %ctzg10 = select i1 %iszero8, i32 %conv9, i32 %cast7 - // CHECK-NEXT: store volatile i32 %ctzg10, ptr %tz, align 4 + // CHECK-NEXT: %25 = load i32, ptr %ui.addr, align 4 + // CHECK-NEXT: %26 = call i32 @llvm.cttz.i32(i32 %25, i1 true) + // CHECK-NEXT: %iszero14 = icmp eq i32 %25, 0 + // CHECK-NEXT: %27 = load i16, ptr %s.addr, align 2 + // CHECK-NEXT: %conv15 = sext i16 %27 to i32 + // CHECK-NEXT: %ctzg16 = select i1 %iszero14, i32 %conv15, i32 %26 + // CHECK-NEXT: store volatile i32 %ctzg16, ptr %tz, align 4 tz = __builtin_ctzg(ui, s); - // CHECK-NEXT: %21 = load i32, ptr %ui.addr, align 4 - // CHECK-NEXT: %22 = call i32 @llvm.cttz.i32(i32 %21, i1 true) - // CHECK-NEXT: %iszero11 = icmp eq i32 %21, 0 - // CHECK-NEXT: %23 = load i16, ptr %s.addr, align 2 - // CHECK-NEXT: %conv12 = sext i16 %23 to i32 - // CHECK-NEXT: %ctzg13 = select i1 %iszero11, i32 %conv12, i32 %22 - // CHECK-NEXT: store volatile i32 %ctzg13, ptr %tz, align 4 + // CHECK-NEXT: %28 = load i64, ptr %ul.addr, align 8 + // CHECK-NEXT: %29 = call i64 @llvm.cttz.i64(i64 %28, i1 true) + // CHECK-NEXT: %cast17 = trunc i64 %29 to i32 + // CHECK-NEXT: %iszero18 = icmp eq i64 %28, 0 + // CHECK-NEXT: %30 = load i16, ptr %us.addr, align 2 + // CHECK-NEXT: %conv19 = zext i16 %30 to i32 + // CHECK-NEXT: %ctzg20 = select i1 %iszero18, i32 %conv19, i32 %cast17 + // CHECK-NEXT: store volatile i32 %ctzg20, ptr %tz, align 4 tz = __builtin_ctzg(ul, us); - // CHECK-NEXT: %24 = load i64, ptr %ul.addr, align 8 - // CHECK-NEXT: %25 = call i64 @llvm.cttz.i64(i64 %24, i1 true) - // CHECK-NEXT: %cast14 = trunc i64 %25 to i32 - // CHECK-NEXT: %iszero15 = icmp eq i64 %24, 0 - // CHECK-NEXT: %26 = load i16, ptr %us.addr, align 2 - // CHECK-NEXT: %conv16 = zext i16 %26 to i32 - // CHECK-NEXT: %ctzg17 = select i1 %iszero15, i32 %conv16, i32 %cast14 - // CHECK-NEXT: store volatile i32 %ctzg17, ptr %tz, align 4 + // CHECK-NEXT: %31 = load i64, ptr %ull.addr, align 8 + // CHECK-NEXT: %32 = call i64 @llvm.cttz.i64(i64 %31, i1 true) + // CHECK-NEXT: %cast21 = trunc i64 %32 to i32 + // CHECK-NEXT: %iszero22 = icmp eq i64 %31, 0 + // CHECK-NEXT: %33 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %ctzg23 = select i1 %iszero22, i32 %33, i32 %cast21 + // CHECK-NEXT: store volatile i32 %ctzg23, ptr %tz, align 4 tz = __builtin_ctzg(ull, i); - // CHECK-NEXT: %27 = load i64, ptr %ull.addr, align 8 - // CHECK-NEXT: %28 = call i64 @llvm.cttz.i64(i64 %27, i1 true) - // CHECK-NEXT: %cast18 = trunc i64 %28 to i32 - // CHECK-NEXT: %iszero19 = icmp eq i64 %27, 0 - // CHECK-NEXT: %29 = load i32, ptr %i.addr, align 4 - // CHECK-NEXT: %ctzg20 = select i1 %iszero19, i32 %29, i32 %cast18 - // CHECK-NEXT: store volatile i32 %ctzg20, ptr %tz, align 4 + // CHECK-NEXT: %34 = load i128, ptr %ui128.addr, align 16 + // CHECK-NEXT: %35 = call i128 @llvm.cttz.i128(i128 %34, i1 true) + // CHECK-NEXT: %cast24 = trunc i128 %35 to i32 + // CHECK-NEXT: %iszero25 = icmp eq i128 %34, 0 + // CHECK-NEXT: %36 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %ctzg26 = select i1 %iszero25, i32 %36, i32 %cast24 + // CHECK-NEXT: store volatile i32 %ctzg26, ptr %tz, align 4 tz = __builtin_ctzg(ui128, i); - // CHECK-NEXT: %30 = load i128, ptr %ui128.addr, align 16 - // CHECK-NEXT: %31 = call i128 @llvm.cttz.i128(i128 %30, i1 true) - // CHECK-NEXT: %cast21 = trunc i128 %31 to i32 - // CHECK-NEXT: %iszero22 = icmp eq i128 %30, 0 - // CHECK-NEXT: %32 = load i32, ptr %i.addr, align 4 - // CHECK-NEXT: %ctzg23 = select i1 %iszero22, i32 %32, i32 %cast21 - // CHECK-NEXT: store volatile i32 %ctzg23, ptr %tz, align 4 + // CHECK-NEXT: %37 = load i128, ptr %ubi128.addr, align 8 + // CHECK-NEXT: %38 = call i128 @llvm.cttz.i128(i128 %37, i1 true) + // CHECK-NEXT: %cast27 = trunc i128 %38 to i32 + // CHECK-NEXT: %iszero28 = icmp eq i128 %37, 0 + // CHECK-NEXT: %39 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %ctzg29 = select i1 %iszero28, i32 %39, i32 %cast27 + // CHECK-NEXT: store volatile i32 %ctzg29, ptr %tz, align 4 tz = __builtin_ctzg(ubi128, i); - // CHECK-NEXT: %33 = load i128, ptr %ubi128.addr, align 8 - // CHECK-NEXT: %34 = call i128 @llvm.cttz.i128(i128 %33, i1 true) - // CHECK-NEXT: %cast24 = trunc i128 %34 to i32 - // CHECK-NEXT: %iszero25 = icmp eq i128 %33, 0 - // CHECK-NEXT: %35 = load i32, ptr %i.addr, align 4 - // CHECK-NEXT: %ctzg26 = select i1 %iszero25, i32 %35, i32 %cast24 - // CHECK-NEXT: store volatile i32 %ctzg26, ptr %tz, align 4 - // CHECK-NEXT: ret void + // CHECK-NEXT: %load_bits30 = load i8, ptr %vb8.addr, align 1 + // CHECK-NEXT: %40 = bitcast i8 %load_bits30 to <8 x i1> + // CHECK-NEXT: %41 = bitcast <8 x i1> %40 to i8 + // CHECK-NEXT: %42 = call i8 @llvm.cttz.i8(i8 %41, i1 true) + // CHECK-NEXT: %cast31 = zext i8 %42 to i32 + // CHECK-NEXT: %iszero32 = icmp eq i8 %41, 0 + // CHECK-NEXT: %43 = load i32, ptr %i.addr, align 4 + // CHECK-NEXT: %ctzg33 = select i1 %iszero32, i32 %43, i32 %cast31 + // CHECK-NEXT: store volatile i32 %ctzg33, ptr %tz, align 4 + tz = __builtin_ctzg(vb8, i); } #endif