diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index ed8bd25698c03..dace204b1251f 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -6139,6 +6139,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, case TargetOpcode::G_FCANONICALIZE: case TargetOpcode::G_SEXT_INREG: case TargetOpcode::G_ABS: + case TargetOpcode::G_CTLZ: if (TypeIdx != 0) return UnableToLegalize; Observer.changingInstr(MI); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 0da3c73b6926d..3270a8145399b 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -326,12 +326,23 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .maxScalarEltSameAsIf(always, 1, 0); getActionDefinitionsBuilder(G_CTLZ) - .legalForCartesianProduct( - {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32}) - .scalarize(1) + .legalFor({{s32, s32}, + {s64, s64}, + {v8s8, v8s8}, + {v16s8, v16s8}, + {v4s16, v4s16}, + {v8s16, v8s16}, + {v2s32, v2s32}, + {v4s32, v4s32}}) .widenScalarToNextPow2(1, /*Min=*/32) .clampScalar(1, s32, s64) + .clampNumElements(0, v8s8, v16s8) + .clampNumElements(0, v4s16, v8s16) + .clampNumElements(0, v2s32, v4s32) + .moreElementsToNextPow2(0) + .scalarizeIf(scalarOrEltWiderThan(0, 32), 0) .scalarSameSizeAs(0, 1); + getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower(); getActionDefinitionsBuilder(G_CTTZ) diff --git a/llvm/test/CodeGen/AArch64/ctlz.ll b/llvm/test/CodeGen/AArch64/ctlz.ll index 437e3d5ff75c6..59a845044e164 100644 --- a/llvm/test/CodeGen/AArch64/ctlz.ll +++ b/llvm/test/CodeGen/AArch64/ctlz.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64-none-eabi -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define void @v2i8(ptr %p1) { @@ -21,14 +21,14 @@ define void @v2i8(ptr %p1) { ; ; CHECK-GI-LABEL: v2i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ldrb w8, [x0] -; CHECK-GI-NEXT: ldrb w9, [x0, #1] -; CHECK-GI-NEXT: clz w8, w8 -; CHECK-GI-NEXT: clz w9, w9 -; CHECK-GI-NEXT: sub w8, w8, #24 -; CHECK-GI-NEXT: sub w9, w9, #24 -; CHECK-GI-NEXT: strb w8, [x0] -; CHECK-GI-NEXT: strb w9, [x0, #1] +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x0, #1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] +; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] +; CHECK-GI-NEXT: clz v0.8b, v0.8b +; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] ; CHECK-GI-NEXT: ret entry: %d = load <2 x i8>, ptr %p1 @@ -59,18 +59,18 @@ define void @v3i8(ptr %p1) { ; ; CHECK-GI-LABEL: v3i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ldrb w8, [x0] -; CHECK-GI-NEXT: ldrb w9, [x0, #1] -; CHECK-GI-NEXT: ldrb w10, [x0, #2] -; CHECK-GI-NEXT: clz w8, w8 -; CHECK-GI-NEXT: clz w9, w9 -; CHECK-GI-NEXT: clz w10, w10 -; CHECK-GI-NEXT: sub w8, w8, #24 -; CHECK-GI-NEXT: sub w9, w9, #24 -; CHECK-GI-NEXT: strb w8, [x0] -; CHECK-GI-NEXT: sub w8, w10, #24 -; CHECK-GI-NEXT: strb w9, [x0, #1] -; CHECK-GI-NEXT: strb w8, [x0, #2] +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x0, #1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: add x9, x0, #2 +; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] +; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] +; CHECK-GI-NEXT: ldr b1, [x0, #2] +; CHECK-GI-NEXT: mov v0.b[2], v1.b[0] +; CHECK-GI-NEXT: clz v0.8b, v0.8b +; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: st1 { v0.b }[2], [x9] ; CHECK-GI-NEXT: ret entry: %d = load <3 x i8>, ptr %p1 @@ -95,29 +95,15 @@ define void @v4i8(ptr %p1) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: ldr w8, [x0] ; CHECK-GI-NEXT: fmov s0, w8 -; CHECK-GI-NEXT: uxtb w8, w8 -; CHECK-GI-NEXT: clz w8, w8 ; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: sub w8, w8, #24 +; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] +; CHECK-GI-NEXT: mov b3, v0.b[2] ; CHECK-GI-NEXT: mov b0, v0.b[3] -; CHECK-GI-NEXT: fmov w9, s1 -; CHECK-GI-NEXT: fmov w10, s2 -; CHECK-GI-NEXT: fmov s1, w8 -; CHECK-GI-NEXT: uxtb w9, w9 -; CHECK-GI-NEXT: uxtb w8, w10 -; CHECK-GI-NEXT: clz w9, w9 -; CHECK-GI-NEXT: clz w8, w8 -; CHECK-GI-NEXT: sub w9, w9, #24 -; CHECK-GI-NEXT: sub w8, w8, #24 -; CHECK-GI-NEXT: mov v1.b[1], w9 -; CHECK-GI-NEXT: fmov w9, s0 -; CHECK-GI-NEXT: uxtb w9, w9 -; CHECK-GI-NEXT: mov v1.b[2], w8 -; CHECK-GI-NEXT: clz w8, w9 -; CHECK-GI-NEXT: sub w8, w8, #24 -; CHECK-GI-NEXT: mov v1.b[3], w8 -; CHECK-GI-NEXT: fmov w8, s1 +; CHECK-GI-NEXT: mov v2.b[1], v1.b[0] +; CHECK-GI-NEXT: mov v2.b[2], v3.b[0] +; CHECK-GI-NEXT: mov v2.b[3], v0.b[0] +; CHECK-GI-NEXT: clz v0.8b, v2.8b +; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: str w8, [x0] ; CHECK-GI-NEXT: ret entry: @@ -148,145 +134,11 @@ entry: } define <32 x i8> @v32i8(<32 x i8> %d) { -; CHECK-SD-LABEL: v32i8: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: clz v0.16b, v0.16b -; CHECK-SD-NEXT: clz v1.16b, v1.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: v32i8: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: umov w9, v0.b[0] -; CHECK-GI-NEXT: umov w11, v1.b[0] -; CHECK-GI-NEXT: umov w10, v0.b[1] -; CHECK-GI-NEXT: umov w13, v1.b[1] -; CHECK-GI-NEXT: umov w8, v0.b[2] -; CHECK-GI-NEXT: clz w9, w9 -; CHECK-GI-NEXT: clz w11, w11 -; CHECK-GI-NEXT: clz w10, w10 -; CHECK-GI-NEXT: sub w14, w9, #24 -; CHECK-GI-NEXT: sub w12, w11, #24 -; CHECK-GI-NEXT: clz w11, w13 -; CHECK-GI-NEXT: fmov s2, w14 -; CHECK-GI-NEXT: fmov s3, w12 -; CHECK-GI-NEXT: umov w9, v1.b[2] -; CHECK-GI-NEXT: sub w10, w10, #24 -; CHECK-GI-NEXT: sub w11, w11, #24 -; CHECK-GI-NEXT: clz w8, w8 -; CHECK-GI-NEXT: sub w8, w8, #24 -; CHECK-GI-NEXT: mov v2.b[1], w10 -; CHECK-GI-NEXT: mov v3.b[1], w11 -; CHECK-GI-NEXT: umov w10, v0.b[3] -; CHECK-GI-NEXT: clz w9, w9 -; CHECK-GI-NEXT: umov w11, v1.b[3] -; CHECK-GI-NEXT: sub w9, w9, #24 -; CHECK-GI-NEXT: mov v2.b[2], w8 -; CHECK-GI-NEXT: mov v3.b[2], w9 -; CHECK-GI-NEXT: clz w8, w10 -; CHECK-GI-NEXT: umov w9, v0.b[4] -; CHECK-GI-NEXT: clz w10, w11 -; CHECK-GI-NEXT: umov w11, v1.b[4] -; CHECK-GI-NEXT: sub w8, w8, #24 -; CHECK-GI-NEXT: sub w10, w10, #24 -; CHECK-GI-NEXT: mov v2.b[3], w8 -; CHECK-GI-NEXT: mov v3.b[3], w10 -; CHECK-GI-NEXT: umov w8, v0.b[5] -; CHECK-GI-NEXT: clz w9, w9 -; CHECK-GI-NEXT: clz w10, w11 -; CHECK-GI-NEXT: umov w11, v1.b[5] -; CHECK-GI-NEXT: sub w9, w9, #24 -; CHECK-GI-NEXT: sub w10, w10, #24 -; CHECK-GI-NEXT: mov v2.b[4], w9 -; CHECK-GI-NEXT: mov v3.b[4], w10 -; CHECK-GI-NEXT: clz w8, w8 -; CHECK-GI-NEXT: umov w9, v0.b[6] -; CHECK-GI-NEXT: clz w10, w11 -; CHECK-GI-NEXT: umov w11, v1.b[6] -; CHECK-GI-NEXT: sub w8, w8, #24 -; CHECK-GI-NEXT: sub w10, w10, #24 -; CHECK-GI-NEXT: mov v2.b[5], w8 -; CHECK-GI-NEXT: mov v3.b[5], w10 -; CHECK-GI-NEXT: umov w8, v0.b[7] -; CHECK-GI-NEXT: clz w9, w9 -; CHECK-GI-NEXT: clz w10, w11 -; CHECK-GI-NEXT: umov w11, v1.b[7] -; CHECK-GI-NEXT: sub w9, w9, #24 -; CHECK-GI-NEXT: sub w10, w10, #24 -; CHECK-GI-NEXT: mov v2.b[6], w9 -; CHECK-GI-NEXT: mov v3.b[6], w10 -; CHECK-GI-NEXT: clz w8, w8 -; CHECK-GI-NEXT: umov w9, v0.b[8] -; CHECK-GI-NEXT: clz w10, w11 -; CHECK-GI-NEXT: umov w11, v1.b[8] -; CHECK-GI-NEXT: sub w8, w8, #24 -; CHECK-GI-NEXT: sub w10, w10, #24 -; CHECK-GI-NEXT: mov v2.b[7], w8 -; CHECK-GI-NEXT: mov v3.b[7], w10 -; CHECK-GI-NEXT: umov w8, v0.b[9] -; CHECK-GI-NEXT: clz w9, w9 -; CHECK-GI-NEXT: clz w10, w11 -; CHECK-GI-NEXT: umov w11, v1.b[9] -; CHECK-GI-NEXT: sub w9, w9, #24 -; CHECK-GI-NEXT: sub w10, w10, #24 -; CHECK-GI-NEXT: mov v2.b[8], w9 -; CHECK-GI-NEXT: mov v3.b[8], w10 -; CHECK-GI-NEXT: clz w8, w8 -; CHECK-GI-NEXT: umov w9, v0.b[10] -; CHECK-GI-NEXT: clz w10, w11 -; CHECK-GI-NEXT: umov w11, v1.b[10] -; CHECK-GI-NEXT: sub w8, w8, #24 -; CHECK-GI-NEXT: sub w10, w10, #24 -; CHECK-GI-NEXT: mov v2.b[9], w8 -; CHECK-GI-NEXT: mov v3.b[9], w10 -; CHECK-GI-NEXT: umov w8, v0.b[11] -; CHECK-GI-NEXT: clz w9, w9 -; CHECK-GI-NEXT: clz w10, w11 -; CHECK-GI-NEXT: umov w11, v1.b[11] -; CHECK-GI-NEXT: sub w9, w9, #24 -; CHECK-GI-NEXT: sub w10, w10, #24 -; CHECK-GI-NEXT: mov v2.b[10], w9 -; CHECK-GI-NEXT: mov v3.b[10], w10 -; CHECK-GI-NEXT: clz w8, w8 -; CHECK-GI-NEXT: umov w9, v0.b[12] -; CHECK-GI-NEXT: clz w10, w11 -; CHECK-GI-NEXT: umov w11, v1.b[12] -; CHECK-GI-NEXT: sub w8, w8, #24 -; CHECK-GI-NEXT: sub w10, w10, #24 -; CHECK-GI-NEXT: mov v2.b[11], w8 -; CHECK-GI-NEXT: mov v3.b[11], w10 -; CHECK-GI-NEXT: umov w8, v0.b[13] -; CHECK-GI-NEXT: clz w9, w9 -; CHECK-GI-NEXT: clz w10, w11 -; CHECK-GI-NEXT: umov w11, v1.b[13] -; CHECK-GI-NEXT: sub w9, w9, #24 -; CHECK-GI-NEXT: sub w10, w10, #24 -; CHECK-GI-NEXT: mov v2.b[12], w9 -; CHECK-GI-NEXT: mov v3.b[12], w10 -; CHECK-GI-NEXT: clz w8, w8 -; CHECK-GI-NEXT: umov w9, v0.b[14] -; CHECK-GI-NEXT: clz w10, w11 -; CHECK-GI-NEXT: umov w11, v1.b[14] -; CHECK-GI-NEXT: sub w8, w8, #24 -; CHECK-GI-NEXT: sub w10, w10, #24 -; CHECK-GI-NEXT: mov v2.b[13], w8 -; CHECK-GI-NEXT: mov v3.b[13], w10 -; CHECK-GI-NEXT: umov w8, v0.b[15] -; CHECK-GI-NEXT: umov w10, v1.b[15] -; CHECK-GI-NEXT: clz w9, w9 -; CHECK-GI-NEXT: clz w11, w11 -; CHECK-GI-NEXT: sub w9, w9, #24 -; CHECK-GI-NEXT: sub w11, w11, #24 -; CHECK-GI-NEXT: mov v2.b[14], w9 -; CHECK-GI-NEXT: mov v3.b[14], w11 -; CHECK-GI-NEXT: clz w8, w8 -; CHECK-GI-NEXT: clz w9, w10 -; CHECK-GI-NEXT: sub w8, w8, #24 -; CHECK-GI-NEXT: sub w9, w9, #24 -; CHECK-GI-NEXT: mov v2.b[15], w8 -; CHECK-GI-NEXT: mov v3.b[15], w9 -; CHECK-GI-NEXT: mov v0.16b, v2.16b -; CHECK-GI-NEXT: mov v1.16b, v3.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: v32i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: clz v0.16b, v0.16b +; CHECK-NEXT: clz v1.16b, v1.16b +; CHECK-NEXT: ret entry: %s = call <32 x i8> @llvm.ctlz(<32 x i8> %d, i1 false) ret <32 x i8> %s @@ -310,14 +162,12 @@ define void @v2i16(ptr %p1) { ; ; CHECK-GI-LABEL: v2i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ldrh w8, [x0] -; CHECK-GI-NEXT: ldrh w9, [x0, #2] -; CHECK-GI-NEXT: clz w8, w8 -; CHECK-GI-NEXT: clz w9, w9 -; CHECK-GI-NEXT: sub w8, w8, #16 -; CHECK-GI-NEXT: sub w9, w9, #16 -; CHECK-GI-NEXT: strh w8, [x0] -; CHECK-GI-NEXT: strh w9, [x0, #2] +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: add x8, x0, #2 +; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] +; CHECK-GI-NEXT: clz v0.4h, v0.4h +; CHECK-GI-NEXT: str h0, [x0] +; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ret entry: %d = load <2 x i16>, ptr %p1 @@ -338,18 +188,15 @@ define void @v3i16(ptr %p1) { ; ; CHECK-GI-LABEL: v3i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ldrh w8, [x0] -; CHECK-GI-NEXT: ldrh w9, [x0, #2] -; CHECK-GI-NEXT: ldrh w10, [x0, #4] -; CHECK-GI-NEXT: clz w8, w8 -; CHECK-GI-NEXT: clz w9, w9 -; CHECK-GI-NEXT: clz w10, w10 -; CHECK-GI-NEXT: sub w8, w8, #16 -; CHECK-GI-NEXT: sub w9, w9, #16 -; CHECK-GI-NEXT: strh w8, [x0] -; CHECK-GI-NEXT: sub w8, w10, #16 -; CHECK-GI-NEXT: strh w9, [x0, #2] -; CHECK-GI-NEXT: strh w8, [x0, #4] +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: add x8, x0, #2 +; CHECK-GI-NEXT: add x9, x0, #4 +; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] +; CHECK-GI-NEXT: ld1 { v0.h }[2], [x9] +; CHECK-GI-NEXT: clz v0.4h, v0.4h +; CHECK-GI-NEXT: str h0, [x0] +; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] +; CHECK-GI-NEXT: st1 { v0.h }[2], [x9] ; CHECK-GI-NEXT: ret entry: %d = load <3 x i16>, ptr %p1 @@ -379,81 +226,11 @@ entry: } define <16 x i16> @v16i16(<16 x i16> %d) { -; CHECK-SD-LABEL: v16i16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: clz v0.8h, v0.8h -; CHECK-SD-NEXT: clz v1.8h, v1.8h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: v16i16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: umov w8, v0.h[0] -; CHECK-GI-NEXT: umov w10, v1.h[0] -; CHECK-GI-NEXT: umov w9, v0.h[1] -; CHECK-GI-NEXT: umov w11, v1.h[1] -; CHECK-GI-NEXT: umov w12, v0.h[2] -; CHECK-GI-NEXT: umov w13, v1.h[2] -; CHECK-GI-NEXT: clz w8, w8 -; CHECK-GI-NEXT: clz w10, w10 -; CHECK-GI-NEXT: clz w9, w9 -; CHECK-GI-NEXT: sub w8, w8, #16 -; CHECK-GI-NEXT: sub w10, w10, #16 -; CHECK-GI-NEXT: clz w11, w11 -; CHECK-GI-NEXT: fmov s2, w8 -; CHECK-GI-NEXT: fmov s3, w10 -; CHECK-GI-NEXT: sub w9, w9, #16 -; CHECK-GI-NEXT: sub w11, w11, #16 -; CHECK-GI-NEXT: umov w8, v0.h[3] -; CHECK-GI-NEXT: clz w10, w13 -; CHECK-GI-NEXT: sub w10, w10, #16 -; CHECK-GI-NEXT: mov v2.h[1], w9 -; CHECK-GI-NEXT: mov v3.h[1], w11 -; CHECK-GI-NEXT: clz w9, w12 -; CHECK-GI-NEXT: umov w11, v1.h[3] -; CHECK-GI-NEXT: sub w9, w9, #16 -; CHECK-GI-NEXT: clz w8, w8 -; CHECK-GI-NEXT: sub w8, w8, #16 -; CHECK-GI-NEXT: mov v2.h[2], w9 -; CHECK-GI-NEXT: mov v3.h[2], w10 -; CHECK-GI-NEXT: umov w9, v0.h[4] -; CHECK-GI-NEXT: clz w10, w11 -; CHECK-GI-NEXT: umov w11, v1.h[4] -; CHECK-GI-NEXT: sub w10, w10, #16 -; CHECK-GI-NEXT: mov v2.h[3], w8 -; CHECK-GI-NEXT: mov v3.h[3], w10 -; CHECK-GI-NEXT: umov w8, v0.h[5] -; CHECK-GI-NEXT: clz w9, w9 -; CHECK-GI-NEXT: clz w10, w11 -; CHECK-GI-NEXT: umov w11, v1.h[5] -; CHECK-GI-NEXT: sub w9, w9, #16 -; CHECK-GI-NEXT: sub w10, w10, #16 -; CHECK-GI-NEXT: mov v2.h[4], w9 -; CHECK-GI-NEXT: mov v3.h[4], w10 -; CHECK-GI-NEXT: clz w8, w8 -; CHECK-GI-NEXT: umov w9, v0.h[6] -; CHECK-GI-NEXT: clz w10, w11 -; CHECK-GI-NEXT: umov w11, v1.h[6] -; CHECK-GI-NEXT: sub w8, w8, #16 -; CHECK-GI-NEXT: sub w10, w10, #16 -; CHECK-GI-NEXT: mov v2.h[5], w8 -; CHECK-GI-NEXT: mov v3.h[5], w10 -; CHECK-GI-NEXT: umov w8, v0.h[7] -; CHECK-GI-NEXT: umov w10, v1.h[7] -; CHECK-GI-NEXT: clz w9, w9 -; CHECK-GI-NEXT: clz w11, w11 -; CHECK-GI-NEXT: sub w9, w9, #16 -; CHECK-GI-NEXT: sub w11, w11, #16 -; CHECK-GI-NEXT: mov v2.h[6], w9 -; CHECK-GI-NEXT: mov v3.h[6], w11 -; CHECK-GI-NEXT: clz w8, w8 -; CHECK-GI-NEXT: clz w9, w10 -; CHECK-GI-NEXT: sub w8, w8, #16 -; CHECK-GI-NEXT: sub w9, w9, #16 -; CHECK-GI-NEXT: mov v2.h[7], w8 -; CHECK-GI-NEXT: mov v3.h[7], w9 -; CHECK-GI-NEXT: mov v0.16b, v2.16b -; CHECK-GI-NEXT: mov v1.16b, v3.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: v16i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: clz v0.8h, v0.8h +; CHECK-NEXT: clz v1.8h, v1.8h +; CHECK-NEXT: ret entry: %s = call <16 x i16> @llvm.ctlz(<16 x i16> %d, i1 false) ret <16 x i16> %s @@ -470,24 +247,10 @@ entry: } define <3 x i32> @v3i32(<3 x i32> %d) { -; CHECK-SD-LABEL: v3i32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: clz v0.4s, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: v3i32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: fmov w8, s0 -; CHECK-GI-NEXT: mov w9, v0.s[1] -; CHECK-GI-NEXT: clz w8, w8 -; CHECK-GI-NEXT: mov v1.s[0], w8 -; CHECK-GI-NEXT: mov w8, v0.s[2] -; CHECK-GI-NEXT: clz w9, w9 -; CHECK-GI-NEXT: mov v1.s[1], w9 -; CHECK-GI-NEXT: clz w8, w8 -; CHECK-GI-NEXT: mov v1.s[2], w8 -; CHECK-GI-NEXT: mov v0.16b, v1.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: v3i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: clz v0.4s, v0.4s +; CHECK-NEXT: ret entry: %s = call <3 x i32> @llvm.ctlz(<3 x i32> %d, i1 false) ret <3 x i32> %s @@ -504,41 +267,11 @@ entry: } define <8 x i32> @v8i32(<8 x i32> %d) { -; CHECK-SD-LABEL: v8i32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: clz v0.4s, v0.4s -; CHECK-SD-NEXT: clz v1.4s, v1.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: v8i32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: fmov w9, s0 -; CHECK-GI-NEXT: fmov w10, s1 -; CHECK-GI-NEXT: mov w8, v0.s[1] -; CHECK-GI-NEXT: mov w11, v1.s[1] -; CHECK-GI-NEXT: clz w9, w9 -; CHECK-GI-NEXT: clz w10, w10 -; CHECK-GI-NEXT: mov v2.s[0], w9 -; CHECK-GI-NEXT: mov v3.s[0], w10 -; CHECK-GI-NEXT: mov w9, v0.s[2] -; CHECK-GI-NEXT: mov w10, v1.s[2] -; CHECK-GI-NEXT: clz w8, w8 -; CHECK-GI-NEXT: clz w11, w11 -; CHECK-GI-NEXT: mov v2.s[1], w8 -; CHECK-GI-NEXT: mov v3.s[1], w11 -; CHECK-GI-NEXT: mov w8, v0.s[3] -; CHECK-GI-NEXT: mov w11, v1.s[3] -; CHECK-GI-NEXT: clz w9, w9 -; CHECK-GI-NEXT: clz w10, w10 -; CHECK-GI-NEXT: mov v2.s[2], w9 -; CHECK-GI-NEXT: mov v3.s[2], w10 -; CHECK-GI-NEXT: clz w8, w8 -; CHECK-GI-NEXT: clz w9, w11 -; CHECK-GI-NEXT: mov v2.s[3], w8 -; CHECK-GI-NEXT: mov v3.s[3], w9 -; CHECK-GI-NEXT: mov v0.16b, v2.16b -; CHECK-GI-NEXT: mov v1.16b, v3.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: v8i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: clz v0.4s, v0.4s +; CHECK-NEXT: clz v1.4s, v1.4s +; CHECK-NEXT: ret entry: %s = call <8 x i32> @llvm.ctlz(<8 x i32> %d, i1 false) ret <8 x i32> %s