diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index aa9032ea2574c..cd0440077f526 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -22533,6 +22533,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT && Index == VecOp.getOperand(2)) { SDValue Elt = VecOp.getOperand(1); + AddUsersToWorklist(VecOp.getNode()); return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt; } diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll index 00cc6b21ccaf8..abf2e1272d645 100644 --- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll +++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -951,10 +951,8 @@ define <1 x i128> @sext_v1x64(<1 x i64> %arg) { ; CHECK-SD-LABEL: sext_v1x64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: fmov x8, d0 -; CHECK-SD-NEXT: asr x1, x8, #63 -; CHECK-SD-NEXT: mov.d v0[1], x1 ; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: asr x1, x0, #63 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sext_v1x64: diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll index 178c229d04e47..62a79e3547b29 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -1802,28 +1802,25 @@ define <2 x i128> @uabd_i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: mov.d x8, v0[1] ; CHECK-NEXT: mov.d x9, v1[1] ; CHECK-NEXT: fmov x10, d0 -; CHECK-NEXT: fmov x11, d1 -; CHECK-NEXT: asr x12, x10, #63 -; CHECK-NEXT: asr x13, x11, #63 -; CHECK-NEXT: subs x10, x10, x11 +; CHECK-NEXT: fmov x12, d1 +; CHECK-NEXT: asr x14, x10, #63 ; CHECK-NEXT: asr x11, x8, #63 -; CHECK-NEXT: asr x14, x9, #63 -; CHECK-NEXT: sbc x12, x12, x13 +; CHECK-NEXT: asr x13, x9, #63 +; CHECK-NEXT: asr x15, x12, #63 ; CHECK-NEXT: subs x8, x8, x9 -; CHECK-NEXT: sbc x9, x11, x14 -; CHECK-NEXT: asr x13, x12, #63 -; CHECK-NEXT: asr x11, x9, #63 -; CHECK-NEXT: eor x10, x10, x13 -; CHECK-NEXT: eor x8, x8, x11 -; CHECK-NEXT: eor x9, x9, x11 -; CHECK-NEXT: subs x2, x8, x11 -; CHECK-NEXT: eor x8, x12, x13 -; CHECK-NEXT: sbc x3, x9, x11 -; CHECK-NEXT: subs x9, x10, x13 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: sbc x1, x8, x13 -; CHECK-NEXT: mov.d v0[1], x1 -; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: sbc x9, x11, x13 +; CHECK-NEXT: subs x10, x10, x12 +; CHECK-NEXT: sbc x11, x14, x15 +; CHECK-NEXT: asr x13, x9, #63 +; CHECK-NEXT: asr x12, x11, #63 +; CHECK-NEXT: eor x8, x8, x13 +; CHECK-NEXT: eor x9, x9, x13 +; CHECK-NEXT: eor x10, x10, x12 +; CHECK-NEXT: eor x11, x11, x12 +; CHECK-NEXT: subs x0, x10, x12 +; CHECK-NEXT: sbc x1, x11, x12 +; CHECK-NEXT: subs x2, x8, x13 +; CHECK-NEXT: sbc x3, x9, x13 ; CHECK-NEXT: ret %aext = sext <2 x i64> %a to <2 x i128> %bext = sext <2 x i64> %b to <2 x i128> diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll index 22440b79bdcd4..b4f179e992a0d 100644 --- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll +++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll @@ -241,21 +241,18 @@ define <4 x i32> @not_sign_4xi32_3(<4 x i32> %a) { define <4 x i65> @sign_4xi65(<4 x i65> %a) { ; CHECK-LABEL: sign_4xi65: ; CHECK: // %bb.0: -; CHECK-NEXT: sbfx x8, x1, #0, #1 -; CHECK-NEXT: sbfx x9, x5, #0, #1 -; CHECK-NEXT: sbfx x10, x3, #0, #1 -; CHECK-NEXT: lsr x1, x8, #63 -; CHECK-NEXT: orr x8, x8, #0x1 -; CHECK-NEXT: lsr x3, x10, #63 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: sbfx x8, x7, #0, #1 -; CHECK-NEXT: lsr x5, x9, #63 -; CHECK-NEXT: orr x2, x10, #0x1 -; CHECK-NEXT: orr x4, x9, #0x1 -; CHECK-NEXT: lsr x7, x8, #63 -; CHECK-NEXT: orr x6, x8, #0x1 -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: sbfx x8, x5, #0, #1 +; CHECK-NEXT: sbfx x9, x3, #0, #1 +; CHECK-NEXT: sbfx x10, x1, #0, #1 +; CHECK-NEXT: sbfx x11, x7, #0, #1 +; CHECK-NEXT: lsr x1, x10, #63 +; CHECK-NEXT: lsr x3, x9, #63 +; CHECK-NEXT: lsr x5, x8, #63 +; CHECK-NEXT: lsr x7, x11, #63 +; CHECK-NEXT: orr x0, x10, #0x1 +; CHECK-NEXT: orr x2, x9, #0x1 +; CHECK-NEXT: orr x4, x8, #0x1 +; CHECK-NEXT: orr x6, x11, #0x1 ; CHECK-NEXT: ret %c = icmp sgt <4 x i65> %a, %res = select <4 x i1> %c, <4 x i65> , <4 x i65 > diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll index 4723ac01d6021..0c880592d955b 100644 --- a/llvm/test/CodeGen/AArch64/fptoi.ll +++ b/llvm/test/CodeGen/AArch64/fptoi.ll @@ -2287,20 +2287,19 @@ define <2 x i128> @fptos_v2f64_v2i128(<2 x double> %a) { ; CHECK-SD-NEXT: .cfi_offset w20, -16 ; CHECK-SD-NEXT: .cfi_offset w30, -32 ; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: mov d0, v0.d[1] +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl __fixdfti ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov x19, x0 ; CHECK-SD-NEXT: mov x20, x1 -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: bl __fixdfti -; CHECK-SD-NEXT: fmov d0, x0 -; CHECK-SD-NEXT: mov x2, x19 -; CHECK-SD-NEXT: mov x3, x20 +; CHECK-SD-NEXT: mov x2, x0 +; CHECK-SD-NEXT: mov x3, x1 +; CHECK-SD-NEXT: mov x0, x19 +; CHECK-SD-NEXT: mov x1, x20 ; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-SD-NEXT: mov v0.d[1], x1 -; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret ; @@ -2345,20 +2344,19 @@ define <2 x i128> @fptou_v2f64_v2i128(<2 x double> %a) { ; CHECK-SD-NEXT: .cfi_offset w20, -16 ; CHECK-SD-NEXT: .cfi_offset w30, -32 ; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: mov d0, v0.d[1] +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: bl __fixunsdfti ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov x19, x0 ; CHECK-SD-NEXT: mov x20, x1 -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: mov d0, v0.d[1] ; CHECK-SD-NEXT: bl __fixunsdfti -; CHECK-SD-NEXT: fmov d0, x0 -; CHECK-SD-NEXT: mov x2, x19 -; CHECK-SD-NEXT: mov x3, x20 +; CHECK-SD-NEXT: mov x2, x0 +; CHECK-SD-NEXT: mov x3, x1 +; CHECK-SD-NEXT: mov x0, x19 +; CHECK-SD-NEXT: mov x1, x20 ; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-SD-NEXT: mov v0.d[1], x1 -; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret ; @@ -2407,28 +2405,26 @@ define <3 x i128> @fptos_v3f64_v3i128(<3 x double> %a) { ; CHECK-SD-NEXT: .cfi_offset w30, -48 ; CHECK-SD-NEXT: .cfi_offset b8, -56 ; CHECK-SD-NEXT: .cfi_offset b9, -64 -; CHECK-SD-NEXT: fmov d9, d0 -; CHECK-SD-NEXT: fmov d0, d1 ; CHECK-SD-NEXT: fmov d8, d2 +; CHECK-SD-NEXT: fmov d9, d1 ; CHECK-SD-NEXT: bl __fixdfti -; CHECK-SD-NEXT: fmov d0, d8 +; CHECK-SD-NEXT: fmov d0, d9 ; CHECK-SD-NEXT: mov x19, x0 ; CHECK-SD-NEXT: mov x20, x1 ; CHECK-SD-NEXT: bl __fixdfti -; CHECK-SD-NEXT: fmov d0, d9 +; CHECK-SD-NEXT: fmov d0, d8 ; CHECK-SD-NEXT: mov x21, x0 ; CHECK-SD-NEXT: mov x22, x1 ; CHECK-SD-NEXT: bl __fixdfti -; CHECK-SD-NEXT: fmov d0, x0 -; CHECK-SD-NEXT: mov x2, x19 -; CHECK-SD-NEXT: mov x3, x20 -; CHECK-SD-NEXT: mov x4, x21 -; CHECK-SD-NEXT: mov x5, x22 -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov x4, x0 +; CHECK-SD-NEXT: mov x5, x1 +; CHECK-SD-NEXT: mov x0, x19 +; CHECK-SD-NEXT: mov x1, x20 +; CHECK-SD-NEXT: mov x2, x21 +; CHECK-SD-NEXT: mov x3, x22 ; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: ldp d9, d8, [sp], #64 // 16-byte Folded Reload ; CHECK-SD-NEXT: ret ; @@ -2488,28 +2484,26 @@ define <3 x i128> @fptou_v3f64_v3i128(<3 x double> %a) { ; CHECK-SD-NEXT: .cfi_offset w30, -48 ; CHECK-SD-NEXT: .cfi_offset b8, -56 ; CHECK-SD-NEXT: .cfi_offset b9, -64 -; CHECK-SD-NEXT: fmov d9, d0 -; CHECK-SD-NEXT: fmov d0, d1 ; CHECK-SD-NEXT: fmov d8, d2 +; CHECK-SD-NEXT: fmov d9, d1 ; CHECK-SD-NEXT: bl __fixunsdfti -; CHECK-SD-NEXT: fmov d0, d8 +; CHECK-SD-NEXT: fmov d0, d9 ; CHECK-SD-NEXT: mov x19, x0 ; CHECK-SD-NEXT: mov x20, x1 ; CHECK-SD-NEXT: bl __fixunsdfti -; CHECK-SD-NEXT: fmov d0, d9 +; CHECK-SD-NEXT: fmov d0, d8 ; CHECK-SD-NEXT: mov x21, x0 ; CHECK-SD-NEXT: mov x22, x1 ; CHECK-SD-NEXT: bl __fixunsdfti -; CHECK-SD-NEXT: fmov d0, x0 -; CHECK-SD-NEXT: mov x2, x19 -; CHECK-SD-NEXT: mov x3, x20 -; CHECK-SD-NEXT: mov x4, x21 -; CHECK-SD-NEXT: mov x5, x22 -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov x4, x0 +; CHECK-SD-NEXT: mov x5, x1 +; CHECK-SD-NEXT: mov x0, x19 +; CHECK-SD-NEXT: mov x1, x20 +; CHECK-SD-NEXT: mov x2, x21 +; CHECK-SD-NEXT: mov x3, x22 ; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: ldp d9, d8, [sp], #64 // 16-byte Folded Reload ; CHECK-SD-NEXT: ret ; @@ -3694,20 +3688,19 @@ define <2 x i128> @fptos_v2f32_v2i128(<2 x float> %a) { ; CHECK-SD-NEXT: .cfi_offset w30, -32 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: mov s0, v0.s[1] +; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov x19, x0 ; CHECK-SD-NEXT: mov x20, x1 -; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl __fixsfti -; CHECK-SD-NEXT: fmov d0, x0 -; CHECK-SD-NEXT: mov x2, x19 -; CHECK-SD-NEXT: mov x3, x20 +; CHECK-SD-NEXT: mov x2, x0 +; CHECK-SD-NEXT: mov x3, x1 +; CHECK-SD-NEXT: mov x0, x19 +; CHECK-SD-NEXT: mov x1, x20 ; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-SD-NEXT: mov v0.d[1], x1 -; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret ; @@ -3754,20 +3747,19 @@ define <2 x i128> @fptou_v2f32_v2i128(<2 x float> %a) { ; CHECK-SD-NEXT: .cfi_offset w30, -32 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: mov s0, v0.s[1] +; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __fixunssfti ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov x19, x0 ; CHECK-SD-NEXT: mov x20, x1 -; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl __fixunssfti -; CHECK-SD-NEXT: fmov d0, x0 -; CHECK-SD-NEXT: mov x2, x19 -; CHECK-SD-NEXT: mov x3, x20 +; CHECK-SD-NEXT: mov x2, x0 +; CHECK-SD-NEXT: mov x3, x1 +; CHECK-SD-NEXT: mov x0, x19 +; CHECK-SD-NEXT: mov x1, x20 ; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-SD-NEXT: mov v0.d[1], x1 -; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret ; @@ -3822,23 +3814,22 @@ define <3 x i128> @fptos_v3f32_v3i128(<3 x float> %a) { ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov x19, x0 ; CHECK-SD-NEXT: mov x20, x1 -; CHECK-SD-NEXT: mov s0, v0.s[1] +; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __fixsfti ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov x21, x0 ; CHECK-SD-NEXT: mov x22, x1 -; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl __fixsfti -; CHECK-SD-NEXT: fmov d0, x0 -; CHECK-SD-NEXT: mov x2, x21 -; CHECK-SD-NEXT: mov x3, x22 +; CHECK-SD-NEXT: mov x2, x0 +; CHECK-SD-NEXT: mov x3, x1 +; CHECK-SD-NEXT: mov x0, x21 +; CHECK-SD-NEXT: mov x1, x22 ; CHECK-SD-NEXT: mov x4, x19 ; CHECK-SD-NEXT: mov x5, x20 -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret ; @@ -3904,23 +3895,22 @@ define <3 x i128> @fptou_v3f32_v3i128(<3 x float> %a) { ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov x19, x0 ; CHECK-SD-NEXT: mov x20, x1 -; CHECK-SD-NEXT: mov s0, v0.s[1] +; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-SD-NEXT: bl __fixunssfti ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov x21, x0 ; CHECK-SD-NEXT: mov x22, x1 -; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-SD-NEXT: mov s0, v0.s[1] ; CHECK-SD-NEXT: bl __fixunssfti -; CHECK-SD-NEXT: fmov d0, x0 -; CHECK-SD-NEXT: mov x2, x21 -; CHECK-SD-NEXT: mov x3, x22 +; CHECK-SD-NEXT: mov x2, x0 +; CHECK-SD-NEXT: mov x3, x1 +; CHECK-SD-NEXT: mov x0, x21 +; CHECK-SD-NEXT: mov x1, x22 ; CHECK-SD-NEXT: mov x4, x19 ; CHECK-SD-NEXT: mov x5, x20 -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret ; @@ -7034,20 +7024,19 @@ define <2 x i128> @fptos_v2f16_v2i128(<2 x half> %a) { ; CHECK-SD-NEXT: .cfi_offset w30, -32 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: mov h0, v0.h[1] +; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-SD-NEXT: bl __fixhfti ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov x19, x0 ; CHECK-SD-NEXT: mov x20, x1 -; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-SD-NEXT: mov h0, v0.h[1] ; CHECK-SD-NEXT: bl __fixhfti -; CHECK-SD-NEXT: fmov d0, x0 -; CHECK-SD-NEXT: mov x2, x19 -; CHECK-SD-NEXT: mov x3, x20 +; CHECK-SD-NEXT: mov x2, x0 +; CHECK-SD-NEXT: mov x3, x1 +; CHECK-SD-NEXT: mov x0, x19 +; CHECK-SD-NEXT: mov x1, x20 ; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-SD-NEXT: mov v0.d[1], x1 -; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret ; @@ -7089,20 +7078,19 @@ define <2 x i128> @fptou_v2f16_v2i128(<2 x half> %a) { ; CHECK-SD-NEXT: .cfi_offset w30, -32 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: mov h0, v0.h[1] +; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-SD-NEXT: bl __fixunshfti ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov x19, x0 ; CHECK-SD-NEXT: mov x20, x1 -; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-SD-NEXT: mov h0, v0.h[1] ; CHECK-SD-NEXT: bl __fixunshfti -; CHECK-SD-NEXT: fmov d0, x0 -; CHECK-SD-NEXT: mov x2, x19 -; CHECK-SD-NEXT: mov x3, x20 +; CHECK-SD-NEXT: mov x2, x0 +; CHECK-SD-NEXT: mov x3, x1 +; CHECK-SD-NEXT: mov x0, x19 +; CHECK-SD-NEXT: mov x1, x20 ; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-SD-NEXT: mov v0.d[1], x1 -; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: add sp, sp, #48 ; CHECK-SD-NEXT: ret ; @@ -7147,28 +7135,27 @@ define <3 x i128> @fptos_v3f16_v3i128(<3 x half> %a) { ; CHECK-SD-NEXT: .cfi_offset w30, -48 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: mov h0, v0.h[1] +; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-SD-NEXT: bl __fixhfti ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov x19, x0 ; CHECK-SD-NEXT: mov x20, x1 -; CHECK-SD-NEXT: mov h0, v0.h[2] +; CHECK-SD-NEXT: mov h0, v0.h[1] ; CHECK-SD-NEXT: bl __fixhfti ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov x21, x0 ; CHECK-SD-NEXT: mov x22, x1 -; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: bl __fixhfti -; CHECK-SD-NEXT: fmov d0, x0 -; CHECK-SD-NEXT: mov x2, x19 -; CHECK-SD-NEXT: mov x3, x20 -; CHECK-SD-NEXT: mov x4, x21 -; CHECK-SD-NEXT: mov x5, x22 -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov x4, x0 +; CHECK-SD-NEXT: mov x5, x1 +; CHECK-SD-NEXT: mov x0, x19 +; CHECK-SD-NEXT: mov x1, x20 +; CHECK-SD-NEXT: mov x2, x21 +; CHECK-SD-NEXT: mov x3, x22 ; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret ; @@ -7220,28 +7207,27 @@ define <3 x i128> @fptou_v3f16_v3i128(<3 x half> %a) { ; CHECK-SD-NEXT: .cfi_offset w30, -48 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: mov h0, v0.h[1] +; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-SD-NEXT: bl __fixunshfti ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov x19, x0 ; CHECK-SD-NEXT: mov x20, x1 -; CHECK-SD-NEXT: mov h0, v0.h[2] +; CHECK-SD-NEXT: mov h0, v0.h[1] ; CHECK-SD-NEXT: bl __fixunshfti ; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-SD-NEXT: mov x21, x0 ; CHECK-SD-NEXT: mov x22, x1 -; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-SD-NEXT: mov h0, v0.h[2] ; CHECK-SD-NEXT: bl __fixunshfti -; CHECK-SD-NEXT: fmov d0, x0 -; CHECK-SD-NEXT: mov x2, x19 -; CHECK-SD-NEXT: mov x3, x20 -; CHECK-SD-NEXT: mov x4, x21 -; CHECK-SD-NEXT: mov x5, x22 -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov x4, x0 +; CHECK-SD-NEXT: mov x5, x1 +; CHECK-SD-NEXT: mov x0, x19 +; CHECK-SD-NEXT: mov x1, x20 +; CHECK-SD-NEXT: mov x2, x21 +; CHECK-SD-NEXT: mov x3, x22 ; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret ; @@ -8083,260 +8069,136 @@ entry: } define <2 x i128> @fptos_v2f128_v2i128(<2 x fp128> %a) { -; CHECK-SD-LABEL: fptos_v2f128_v2i128: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 -; CHECK-SD-NEXT: .cfi_offset w19, -8 -; CHECK-SD-NEXT: .cfi_offset w20, -16 -; CHECK-SD-NEXT: .cfi_offset w30, -32 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: mov v0.16b, v1.16b -; CHECK-SD-NEXT: bl __fixtfti -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: mov x19, x0 -; CHECK-SD-NEXT: mov x20, x1 -; CHECK-SD-NEXT: bl __fixtfti -; CHECK-SD-NEXT: fmov d0, x0 -; CHECK-SD-NEXT: mov x2, x19 -; CHECK-SD-NEXT: mov x3, x20 -; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-SD-NEXT: mov v0.d[1], x1 -; CHECK-SD-NEXT: fmov x0, d0 -; CHECK-SD-NEXT: add sp, sp, #48 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fptos_v2f128_v2i128: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub sp, sp, #48 -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 -; CHECK-GI-NEXT: .cfi_offset w19, -8 -; CHECK-GI-NEXT: .cfi_offset w20, -16 -; CHECK-GI-NEXT: .cfi_offset w30, -32 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill -; CHECK-GI-NEXT: bl __fixtfti -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: mov x19, x0 -; CHECK-GI-NEXT: mov x20, x1 -; CHECK-GI-NEXT: bl __fixtfti -; CHECK-GI-NEXT: mov x2, x0 -; CHECK-GI-NEXT: mov x3, x1 -; CHECK-GI-NEXT: mov x0, x19 -; CHECK-GI-NEXT: mov x1, x20 -; CHECK-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-GI-NEXT: add sp, sp, #48 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fptos_v2f128_v2i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: bl __fixtfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: bl __fixtfti +; CHECK-NEXT: mov x2, x0 +; CHECK-NEXT: mov x3, x1 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret entry: %c = fptosi <2 x fp128> %a to <2 x i128> ret <2 x i128> %c } define <2 x i128> @fptou_v2f128_v2i128(<2 x fp128> %a) { -; CHECK-SD-LABEL: fptou_v2f128_v2i128: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: sub sp, sp, #48 -; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 -; CHECK-SD-NEXT: .cfi_offset w19, -8 -; CHECK-SD-NEXT: .cfi_offset w20, -16 -; CHECK-SD-NEXT: .cfi_offset w30, -32 -; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-SD-NEXT: mov v0.16b, v1.16b -; CHECK-SD-NEXT: bl __fixunstfti -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: mov x19, x0 -; CHECK-SD-NEXT: mov x20, x1 -; CHECK-SD-NEXT: bl __fixunstfti -; CHECK-SD-NEXT: fmov d0, x0 -; CHECK-SD-NEXT: mov x2, x19 -; CHECK-SD-NEXT: mov x3, x20 -; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-SD-NEXT: mov v0.d[1], x1 -; CHECK-SD-NEXT: fmov x0, d0 -; CHECK-SD-NEXT: add sp, sp, #48 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fptou_v2f128_v2i128: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub sp, sp, #48 -; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 -; CHECK-GI-NEXT: .cfi_offset w19, -8 -; CHECK-GI-NEXT: .cfi_offset w20, -16 -; CHECK-GI-NEXT: .cfi_offset w30, -32 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill -; CHECK-GI-NEXT: bl __fixunstfti -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: mov x19, x0 -; CHECK-GI-NEXT: mov x20, x1 -; CHECK-GI-NEXT: bl __fixunstfti -; CHECK-GI-NEXT: mov x2, x0 -; CHECK-GI-NEXT: mov x3, x1 -; CHECK-GI-NEXT: mov x0, x19 -; CHECK-GI-NEXT: mov x1, x20 -; CHECK-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-GI-NEXT: add sp, sp, #48 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fptou_v2f128_v2i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: bl __fixunstfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: bl __fixunstfti +; CHECK-NEXT: mov x2, x0 +; CHECK-NEXT: mov x3, x1 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret entry: %c = fptoui <2 x fp128> %a to <2 x i128> ret <2 x i128> %c } define <3 x i128> @fptos_v3f128_v3i128(<3 x fp128> %a) { -; CHECK-SD-LABEL: fptos_v3f128_v3i128: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: sub sp, sp, #80 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-SD-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill -; CHECK-SD-NEXT: .cfi_def_cfa_offset 80 -; CHECK-SD-NEXT: .cfi_offset w19, -8 -; CHECK-SD-NEXT: .cfi_offset w20, -16 -; CHECK-SD-NEXT: .cfi_offset w21, -24 -; CHECK-SD-NEXT: .cfi_offset w22, -32 -; CHECK-SD-NEXT: .cfi_offset w30, -48 -; CHECK-SD-NEXT: stp q2, q0, [sp] // 32-byte Folded Spill -; CHECK-SD-NEXT: mov v0.16b, v1.16b -; CHECK-SD-NEXT: bl __fixtfti -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: mov x19, x0 -; CHECK-SD-NEXT: mov x20, x1 -; CHECK-SD-NEXT: bl __fixtfti -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: mov x21, x0 -; CHECK-SD-NEXT: mov x22, x1 -; CHECK-SD-NEXT: bl __fixtfti -; CHECK-SD-NEXT: fmov d0, x0 -; CHECK-SD-NEXT: mov x2, x19 -; CHECK-SD-NEXT: mov x3, x20 -; CHECK-SD-NEXT: mov x4, x21 -; CHECK-SD-NEXT: mov x5, x22 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload -; CHECK-SD-NEXT: mov v0.d[1], x1 -; CHECK-SD-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: fmov x0, d0 -; CHECK-SD-NEXT: add sp, sp, #80 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fptos_v3f128_v3i128: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-GI-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 -; CHECK-GI-NEXT: .cfi_offset w19, -8 -; CHECK-GI-NEXT: .cfi_offset w20, -16 -; CHECK-GI-NEXT: .cfi_offset w21, -24 -; CHECK-GI-NEXT: .cfi_offset w22, -32 -; CHECK-GI-NEXT: .cfi_offset w30, -48 -; CHECK-GI-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill -; CHECK-GI-NEXT: bl __fixtfti -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: mov x19, x0 -; CHECK-GI-NEXT: mov x20, x1 -; CHECK-GI-NEXT: bl __fixtfti -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: mov x21, x0 -; CHECK-GI-NEXT: mov x22, x1 -; CHECK-GI-NEXT: bl __fixtfti -; CHECK-GI-NEXT: mov x4, x0 -; CHECK-GI-NEXT: mov x5, x1 -; CHECK-GI-NEXT: mov x0, x19 -; CHECK-GI-NEXT: mov x1, x20 -; CHECK-GI-NEXT: mov x2, x21 -; CHECK-GI-NEXT: mov x3, x22 -; CHECK-GI-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload -; CHECK-GI-NEXT: add sp, sp, #80 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fptos_v3f128_v3i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w22, -32 +; CHECK-NEXT: .cfi_offset w30, -48 +; CHECK-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill +; CHECK-NEXT: bl __fixtfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: bl __fixtfti +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov x21, x0 +; CHECK-NEXT: mov x22, x1 +; CHECK-NEXT: bl __fixtfti +; CHECK-NEXT: mov x4, x0 +; CHECK-NEXT: mov x5, x1 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: mov x2, x21 +; CHECK-NEXT: mov x3, x22 +; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: ret entry: %c = fptosi <3 x fp128> %a to <3 x i128> ret <3 x i128> %c } define <3 x i128> @fptou_v3f128_v3i128(<3 x fp128> %a) { -; CHECK-SD-LABEL: fptou_v3f128_v3i128: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: sub sp, sp, #80 -; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-SD-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill -; CHECK-SD-NEXT: .cfi_def_cfa_offset 80 -; CHECK-SD-NEXT: .cfi_offset w19, -8 -; CHECK-SD-NEXT: .cfi_offset w20, -16 -; CHECK-SD-NEXT: .cfi_offset w21, -24 -; CHECK-SD-NEXT: .cfi_offset w22, -32 -; CHECK-SD-NEXT: .cfi_offset w30, -48 -; CHECK-SD-NEXT: stp q2, q0, [sp] // 32-byte Folded Spill -; CHECK-SD-NEXT: mov v0.16b, v1.16b -; CHECK-SD-NEXT: bl __fixunstfti -; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-SD-NEXT: mov x19, x0 -; CHECK-SD-NEXT: mov x20, x1 -; CHECK-SD-NEXT: bl __fixunstfti -; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: mov x21, x0 -; CHECK-SD-NEXT: mov x22, x1 -; CHECK-SD-NEXT: bl __fixunstfti -; CHECK-SD-NEXT: fmov d0, x0 -; CHECK-SD-NEXT: mov x2, x19 -; CHECK-SD-NEXT: mov x3, x20 -; CHECK-SD-NEXT: mov x4, x21 -; CHECK-SD-NEXT: mov x5, x22 -; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload -; CHECK-SD-NEXT: mov v0.d[1], x1 -; CHECK-SD-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: fmov x0, d0 -; CHECK-SD-NEXT: add sp, sp, #80 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fptou_v3f128_v3i128: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub sp, sp, #80 -; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-GI-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 -; CHECK-GI-NEXT: .cfi_offset w19, -8 -; CHECK-GI-NEXT: .cfi_offset w20, -16 -; CHECK-GI-NEXT: .cfi_offset w21, -24 -; CHECK-GI-NEXT: .cfi_offset w22, -32 -; CHECK-GI-NEXT: .cfi_offset w30, -48 -; CHECK-GI-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill -; CHECK-GI-NEXT: bl __fixunstfti -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: mov x19, x0 -; CHECK-GI-NEXT: mov x20, x1 -; CHECK-GI-NEXT: bl __fixunstfti -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: mov x21, x0 -; CHECK-GI-NEXT: mov x22, x1 -; CHECK-GI-NEXT: bl __fixunstfti -; CHECK-GI-NEXT: mov x4, x0 -; CHECK-GI-NEXT: mov x5, x1 -; CHECK-GI-NEXT: mov x0, x19 -; CHECK-GI-NEXT: mov x1, x20 -; CHECK-GI-NEXT: mov x2, x21 -; CHECK-GI-NEXT: mov x3, x22 -; CHECK-GI-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-GI-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload -; CHECK-GI-NEXT: add sp, sp, #80 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fptou_v3f128_v3i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w22, -32 +; CHECK-NEXT: .cfi_offset w30, -48 +; CHECK-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill +; CHECK-NEXT: bl __fixunstfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: bl __fixunstfti +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov x21, x0 +; CHECK-NEXT: mov x22, x1 +; CHECK-NEXT: bl __fixunstfti +; CHECK-NEXT: mov x4, x0 +; CHECK-NEXT: mov x5, x1 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: mov x2, x21 +; CHECK-NEXT: mov x3, x22 +; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: ret entry: %c = fptoui <3 x fp128> %a to <3 x i128> ret <3 x i128> %c diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index d620a8851ee44..91c8b7f345e32 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -819,47 +819,43 @@ define <2 x i100> @test_signed_v2f32_v2i100(<2 x float> %f) { ; CHECK-NEXT: .cfi_offset b9, -56 ; CHECK-NEXT: .cfi_offset b10, -64 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s8, v0.s[1] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: fmov s0, s8 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: movi v9.2s, #241, lsl #24 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #1895825407 // =0x70ffffff -; CHECK-NEXT: mov x21, #-34359738368 // =0xfffffff800000000 ; CHECK-NEXT: fmov s10, w8 +; CHECK-NEXT: mov x21, #-34359738368 // =0xfffffff800000000 ; CHECK-NEXT: mov x22, #34359738367 // =0x7ffffffff -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: mov s8, v0.s[1] +; CHECK-NEXT: fcmp s0, s9 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x21, x1, lt -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: fcmp s0, s10 ; CHECK-NEXT: csel x9, x22, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: fcmp s8, s8 +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csel x19, xzr, x8, vs ; CHECK-NEXT: csel x20, xzr, x9, vs ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: fcmp s0, s9 -; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload -; CHECK-NEXT: csel x8, x21, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp s0, s10 +; CHECK-NEXT: csel x8, xzr, x0, lt +; CHECK-NEXT: csel x9, x21, x1, lt +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 ; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x8, x22, x8, gt -; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csinv x8, x8, xzr, le +; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: csel x9, xzr, x9, vs -; CHECK-NEXT: csel x1, xzr, x8, vs -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: csel x2, xzr, x8, vs +; CHECK-NEXT: csel x3, xzr, x9, vs ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret %x = call <2 x i100> @llvm.fptosi.sat.v2f32.v2i100(<2 x float> %f) @@ -885,47 +881,43 @@ define <2 x i128> @test_signed_v2f32_v2i128(<2 x float> %f) { ; CHECK-NEXT: .cfi_offset b9, -56 ; CHECK-NEXT: .cfi_offset b10, -64 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s8, v0.s[1] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: fmov s0, s8 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: movi v9.2s, #255, lsl #24 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #2130706431 // =0x7effffff -; CHECK-NEXT: mov x21, #-9223372036854775808 // =0x8000000000000000 ; CHECK-NEXT: fmov s10, w8 +; CHECK-NEXT: mov x21, #-9223372036854775808 // =0x8000000000000000 ; CHECK-NEXT: mov x22, #9223372036854775807 // =0x7fffffffffffffff -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: mov s8, v0.s[1] +; CHECK-NEXT: fcmp s0, s9 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x21, x1, lt -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: fcmp s0, s10 ; CHECK-NEXT: csel x9, x22, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: fcmp s8, s8 +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csel x19, xzr, x8, vs ; CHECK-NEXT: csel x20, xzr, x9, vs ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: fcmp s0, s9 -; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload -; CHECK-NEXT: csel x8, x21, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp s0, s10 +; CHECK-NEXT: csel x8, xzr, x0, lt +; CHECK-NEXT: csel x9, x21, x1, lt +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 ; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x8, x22, x8, gt -; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csinv x8, x8, xzr, le +; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: csel x9, xzr, x9, vs -; CHECK-NEXT: csel x1, xzr, x8, vs -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: csel x2, xzr, x8, vs +; CHECK-NEXT: csel x3, xzr, x9, vs ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret %x = call <2 x i128> @llvm.fptosi.sat.v2f32.v2i128(<2 x float> %f) @@ -1068,15 +1060,15 @@ define <4 x i64> @test_signed_v4f32_v4i64(<4 x float> %f) { define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i100: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #128 -; CHECK-NEXT: str d10, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #40] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #56] // 8-byte Folded Spill -; CHECK-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 128 +; CHECK-NEXT: sub sp, sp, #112 +; CHECK-NEXT: str d10, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 112 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 @@ -1089,28 +1081,40 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { ; CHECK-NEXT: .cfi_offset b8, -80 ; CHECK-NEXT: .cfi_offset b9, -88 ; CHECK-NEXT: .cfi_offset b10, -96 -; CHECK-NEXT: mov s8, v0.s[1] -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: fmov s0, s8 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: movi v9.2s, #241, lsl #24 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #1895825407 // =0x70ffffff -; CHECK-NEXT: mov x25, #-34359738368 // =0xfffffff800000000 ; CHECK-NEXT: fmov s10, w8 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov x25, #-34359738368 // =0xfffffff800000000 ; CHECK-NEXT: mov x26, #34359738367 // =0x7ffffffff -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov s8, v0.s[1] +; CHECK-NEXT: fcmp s0, s9 +; CHECK-NEXT: csel x8, xzr, x0, lt +; CHECK-NEXT: csel x9, x25, x1, lt +; CHECK-NEXT: fcmp s0, s10 +; CHECK-NEXT: csel x9, x26, x9, gt +; CHECK-NEXT: csinv x8, x8, xzr, le +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: fmov s0, s8 +; CHECK-NEXT: csel x19, xzr, x8, vs +; CHECK-NEXT: csel x20, xzr, x9, vs +; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: csel x9, x26, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 -; CHECK-NEXT: csel x19, xzr, x8, vs -; CHECK-NEXT: csel x20, xzr, x9, vs +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: csel x21, xzr, x8, vs +; CHECK-NEXT: csel x22, xzr, x9, vs ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s0, s9 @@ -1122,48 +1126,32 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s0, s0 ; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: csel x21, xzr, x8, vs -; CHECK-NEXT: csel x22, xzr, x9, vs +; CHECK-NEXT: csel x23, xzr, x8, vs +; CHECK-NEXT: csel x24, xzr, x9, vs ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: mov x2, x21 +; CHECK-NEXT: mov x3, x22 +; CHECK-NEXT: mov x4, x23 +; CHECK-NEXT: mov x5, x24 +; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload ; CHECK-NEXT: csel x9, x26, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 -; CHECK-NEXT: csel x23, xzr, x8, vs -; CHECK-NEXT: csel x24, xzr, x9, vs -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: mov x4, x21 -; CHECK-NEXT: mov x5, x22 -; CHECK-NEXT: mov x6, x23 -; CHECK-NEXT: fcmp s0, s9 -; CHECK-NEXT: mov x7, x24 -; CHECK-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload -; CHECK-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload -; CHECK-NEXT: csel x8, x25, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp s0, s10 -; CHECK-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr d10, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #40] // 16-byte Folded Reload -; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x8, x26, x8, gt -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: csel x9, xzr, x9, vs -; CHECK-NEXT: csel x1, xzr, x8, vs -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: add sp, sp, #128 +; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: csel x6, xzr, x8, vs +; CHECK-NEXT: csel x7, xzr, x9, vs +; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret %x = call <4 x i100> @llvm.fptosi.sat.v4f32.v4i100(<4 x float> %f) ret <4 x i100> %x @@ -1172,15 +1160,15 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i128: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #128 -; CHECK-NEXT: str d10, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #40] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #56] // 8-byte Folded Spill -; CHECK-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 128 +; CHECK-NEXT: sub sp, sp, #112 +; CHECK-NEXT: str d10, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 112 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 @@ -1193,28 +1181,40 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) { ; CHECK-NEXT: .cfi_offset b8, -80 ; CHECK-NEXT: .cfi_offset b9, -88 ; CHECK-NEXT: .cfi_offset b10, -96 -; CHECK-NEXT: mov s8, v0.s[1] -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: fmov s0, s8 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: movi v9.2s, #255, lsl #24 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #2130706431 // =0x7effffff -; CHECK-NEXT: mov x25, #-9223372036854775808 // =0x8000000000000000 ; CHECK-NEXT: fmov s10, w8 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov x25, #-9223372036854775808 // =0x8000000000000000 ; CHECK-NEXT: mov x26, #9223372036854775807 // =0x7fffffffffffffff -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov s8, v0.s[1] +; CHECK-NEXT: fcmp s0, s9 +; CHECK-NEXT: csel x8, xzr, x0, lt +; CHECK-NEXT: csel x9, x25, x1, lt +; CHECK-NEXT: fcmp s0, s10 +; CHECK-NEXT: csel x9, x26, x9, gt +; CHECK-NEXT: csinv x8, x8, xzr, le +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: fmov s0, s8 +; CHECK-NEXT: csel x19, xzr, x8, vs +; CHECK-NEXT: csel x20, xzr, x9, vs +; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: csel x9, x26, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 -; CHECK-NEXT: csel x19, xzr, x8, vs -; CHECK-NEXT: csel x20, xzr, x9, vs +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: csel x21, xzr, x8, vs +; CHECK-NEXT: csel x22, xzr, x9, vs ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s0, s9 @@ -1226,48 +1226,32 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) { ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s0, s0 ; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: csel x21, xzr, x8, vs -; CHECK-NEXT: csel x22, xzr, x9, vs +; CHECK-NEXT: csel x23, xzr, x8, vs +; CHECK-NEXT: csel x24, xzr, x9, vs ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: mov x2, x21 +; CHECK-NEXT: mov x3, x22 +; CHECK-NEXT: mov x4, x23 +; CHECK-NEXT: mov x5, x24 +; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload ; CHECK-NEXT: csel x9, x26, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 -; CHECK-NEXT: csel x23, xzr, x8, vs -; CHECK-NEXT: csel x24, xzr, x9, vs -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: mov x4, x21 -; CHECK-NEXT: mov x5, x22 -; CHECK-NEXT: mov x6, x23 -; CHECK-NEXT: fcmp s0, s9 -; CHECK-NEXT: mov x7, x24 -; CHECK-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload -; CHECK-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload -; CHECK-NEXT: csel x8, x25, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp s0, s10 -; CHECK-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr d10, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #40] // 16-byte Folded Reload -; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x8, x26, x8, gt -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: csel x9, xzr, x9, vs -; CHECK-NEXT: csel x1, xzr, x8, vs -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: add sp, sp, #128 +; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: csel x6, xzr, x8, vs +; CHECK-NEXT: csel x7, xzr, x9, vs +; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret %x = call <4 x i128> @llvm.fptosi.sat.v4f32.v4i128(<4 x float> %f) ret <4 x i128> %x @@ -1465,48 +1449,44 @@ define <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) { ; CHECK-NEXT: .cfi_offset b8, -48 ; CHECK-NEXT: .cfi_offset b9, -56 ; CHECK-NEXT: .cfi_offset b10, -64 -; CHECK-NEXT: mov d8, v0.d[1] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: fmov d0, d8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: mov x8, #-4170333254945079296 // =0xc620000000000000 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x21, #-34359738368 // =0xfffffff800000000 -; CHECK-NEXT: mov x22, #34359738367 // =0x7ffffffff ; CHECK-NEXT: fmov d9, x8 ; CHECK-NEXT: mov x8, #5053038781909696511 // =0x461fffffffffffff -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x22, #34359738367 // =0x7ffffffff ; CHECK-NEXT: fmov d10, x8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: fcmp d8, d9 +; CHECK-NEXT: mov d8, v0.d[1] +; CHECK-NEXT: fcmp d0, d9 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x21, x1, lt -; CHECK-NEXT: fcmp d8, d10 +; CHECK-NEXT: fcmp d0, d10 ; CHECK-NEXT: csel x9, x22, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: fcmp d8, d8 +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: fmov d0, d8 ; CHECK-NEXT: csel x19, xzr, x8, vs ; CHECK-NEXT: csel x20, xzr, x9, vs ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: fcmp d8, d9 ; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: fcmp d0, d9 -; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload -; CHECK-NEXT: csel x8, x21, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp d0, d10 +; CHECK-NEXT: csel x8, xzr, x0, lt +; CHECK-NEXT: csel x9, x21, x1, lt +; CHECK-NEXT: fcmp d8, d10 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 ; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x8, x22, x8, gt -; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csinv x8, x8, xzr, le +; CHECK-NEXT: fcmp d8, d8 ; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: csel x9, xzr, x9, vs -; CHECK-NEXT: csel x1, xzr, x8, vs -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: csel x2, xzr, x8, vs +; CHECK-NEXT: csel x3, xzr, x9, vs ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret %x = call <2 x i100> @llvm.fptosi.sat.v2f64.v2i100(<2 x double> %f) @@ -1531,48 +1511,44 @@ define <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) { ; CHECK-NEXT: .cfi_offset b8, -48 ; CHECK-NEXT: .cfi_offset b9, -56 ; CHECK-NEXT: .cfi_offset b10, -64 -; CHECK-NEXT: mov d8, v0.d[1] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: fmov d0, d8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: mov x8, #-4044232465378705408 // =0xc7e0000000000000 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x21, #-9223372036854775808 // =0x8000000000000000 -; CHECK-NEXT: mov x22, #9223372036854775807 // =0x7fffffffffffffff ; CHECK-NEXT: fmov d9, x8 ; CHECK-NEXT: mov x8, #5179139571476070399 // =0x47dfffffffffffff -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x22, #9223372036854775807 // =0x7fffffffffffffff ; CHECK-NEXT: fmov d10, x8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: fcmp d8, d9 +; CHECK-NEXT: mov d8, v0.d[1] +; CHECK-NEXT: fcmp d0, d9 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x21, x1, lt -; CHECK-NEXT: fcmp d8, d10 +; CHECK-NEXT: fcmp d0, d10 ; CHECK-NEXT: csel x9, x22, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: fcmp d8, d8 +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: fmov d0, d8 ; CHECK-NEXT: csel x19, xzr, x8, vs ; CHECK-NEXT: csel x20, xzr, x9, vs ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: fcmp d8, d9 ; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: fcmp d0, d9 -; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload -; CHECK-NEXT: csel x8, x21, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp d0, d10 +; CHECK-NEXT: csel x8, xzr, x0, lt +; CHECK-NEXT: csel x9, x21, x1, lt +; CHECK-NEXT: fcmp d8, d10 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 ; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x8, x22, x8, gt -; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csinv x8, x8, xzr, le +; CHECK-NEXT: fcmp d8, d8 ; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: csel x9, xzr, x9, vs -; CHECK-NEXT: csel x1, xzr, x8, vs -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: csel x2, xzr, x8, vs +; CHECK-NEXT: csel x3, xzr, x9, vs ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret %x = call <2 x i128> @llvm.fptosi.sat.v2f64.v2i128(<2 x double> %f) @@ -1838,9 +1814,8 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) { ; CHECK-NEXT: .cfi_offset b9, -88 ; CHECK-NEXT: .cfi_offset b10, -96 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h1, v0.h[1] +; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: fcvt s8, h1 ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: movi v9.2s, #241, lsl #24 @@ -1849,7 +1824,7 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) { ; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: mov x25, #-34359738368 // =0xfffffff800000000 ; CHECK-NEXT: mov x26, #34359738367 // =0x7ffffffff -; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt @@ -1864,7 +1839,7 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) { ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s10 @@ -1878,6 +1853,7 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) { ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s10 @@ -1890,30 +1866,27 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) { ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: mov x4, x21 -; CHECK-NEXT: mov x5, x22 -; CHECK-NEXT: mov x6, x23 -; CHECK-NEXT: mov x7, x24 -; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload -; CHECK-NEXT: csel x8, x25, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: mov x2, x21 +; CHECK-NEXT: mov x3, x22 +; CHECK-NEXT: mov x4, x23 +; CHECK-NEXT: mov x5, x24 ; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: csel x8, xzr, x0, lt +; CHECK-NEXT: csel x9, x25, x1, lt +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 ; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x8, x26, x8, gt +; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: csel x9, x26, x9, gt +; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 +; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload -; CHECK-NEXT: csel x9, xzr, x9, vs -; CHECK-NEXT: csel x1, xzr, x8, vs -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: csel x6, xzr, x8, vs +; CHECK-NEXT: csel x7, xzr, x9, vs ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret %x = call <4 x i100> @llvm.fptosi.sat.v4f16.v4i100(<4 x half> %f) @@ -1945,9 +1918,8 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) { ; CHECK-NEXT: .cfi_offset b9, -88 ; CHECK-NEXT: .cfi_offset b10, -96 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h1, v0.h[1] +; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: fcvt s8, h1 ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: movi v9.2s, #255, lsl #24 @@ -1956,7 +1928,7 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) { ; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: mov x25, #-9223372036854775808 // =0x8000000000000000 ; CHECK-NEXT: mov x26, #9223372036854775807 // =0x7fffffffffffffff -; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt @@ -1971,7 +1943,7 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) { ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s10 @@ -1985,6 +1957,7 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) { ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s10 @@ -1997,30 +1970,27 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) { ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: mov x4, x21 -; CHECK-NEXT: mov x5, x22 -; CHECK-NEXT: mov x6, x23 -; CHECK-NEXT: mov x7, x24 -; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload -; CHECK-NEXT: csel x8, x25, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: mov x2, x21 +; CHECK-NEXT: mov x3, x22 +; CHECK-NEXT: mov x4, x23 +; CHECK-NEXT: mov x5, x24 ; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: csel x8, xzr, x0, lt +; CHECK-NEXT: csel x9, x25, x1, lt +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 ; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x8, x26, x8, gt +; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: csel x9, x26, x9, gt +; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 +; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload -; CHECK-NEXT: csel x9, xzr, x9, vs -; CHECK-NEXT: csel x1, xzr, x8, vs -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: csel x6, xzr, x8, vs +; CHECK-NEXT: csel x7, xzr, x9, vs ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret %x = call <4 x i128> @llvm.fptosi.sat.v4f16.v4i128(<4 x half> %f) diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index b03d145d1408d..6089d76f7820c 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -729,37 +729,33 @@ define <2 x i100> @test_unsigned_v2f32_v2i100(<2 x float> %f) { ; CHECK-NEXT: .cfi_offset b8, -40 ; CHECK-NEXT: .cfi_offset b9, -48 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s8, v0.s[1] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: fmov s0, s8 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl __fixunssfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #1904214015 // =0x717fffff -; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: mov x21, #68719476735 // =0xfffffffff ; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: mov s8, v0.s[1] +; CHECK-NEXT: fcmp s0, #0.0 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt -; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: fcmp s0, s9 +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csel x19, x21, x9, gt ; CHECK-NEXT: csinv x20, x8, xzr, le ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x2, x20 -; CHECK-NEXT: mov x3, x19 -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt -; CHECK-NEXT: fcmp s0, s9 -; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csel x1, x21, x9, gt -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: mov x0, x20 +; CHECK-NEXT: mov x1, x19 +; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: csel x3, x21, x9, gt ; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: csinv x2, x8, xzr, le ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret %x = call <2 x i100> @llvm.fptoui.sat.v2f32.v2i100(<2 x float> %f) @@ -780,36 +776,32 @@ define <2 x i128> @test_unsigned_v2f32_v2i128(<2 x float> %f) { ; CHECK-NEXT: .cfi_offset b8, -40 ; CHECK-NEXT: .cfi_offset b9, -48 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s8, v0.s[1] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: fmov s0, s8 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff -; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: mov s8, v0.s[1] +; CHECK-NEXT: fcmp s0, #0.0 ; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: fcmp s0, s9 +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csinv x19, x9, xzr, le ; CHECK-NEXT: csinv x20, x8, xzr, le ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: fcmp s0, #0.0 -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, xzr, x1, lt -; CHECK-NEXT: fcmp s0, s9 +; CHECK-NEXT: csel x8, xzr, x1, lt +; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csinv x1, x9, xzr, le -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: csinv x2, x9, xzr, le +; CHECK-NEXT: csinv x3, x8, xzr, le ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret %x = call <2 x i128> @llvm.fptoui.sat.v2f32.v2i128(<2 x float> %f) @@ -935,13 +927,13 @@ define <4 x i64> @test_unsigned_v4f32_v4i64(<4 x float> %f) { define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i100: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x25, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 112 +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x25, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 96 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 @@ -952,23 +944,32 @@ define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) { ; CHECK-NEXT: .cfi_offset w30, -64 ; CHECK-NEXT: .cfi_offset b8, -72 ; CHECK-NEXT: .cfi_offset b9, -80 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl __fixunssfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov w8, #1904214015 // =0x717fffff +; CHECK-NEXT: mov x25, #68719476735 // =0xfffffffff +; CHECK-NEXT: fmov s9, w8 ; CHECK-NEXT: mov s8, v0.s[1] -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: csel x8, xzr, x0, lt +; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: fcmp s0, s9 ; CHECK-NEXT: fmov s0, s8 +; CHECK-NEXT: csel x19, x25, x9, gt +; CHECK-NEXT: csinv x20, x8, xzr, le ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov w8, #1904214015 // =0x717fffff ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: mov x25, #68719476735 // =0xfffffffff +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: csel x21, x25, x9, gt +; CHECK-NEXT: csinv x22, x8, xzr, le ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: csel x19, x25, x9, gt -; CHECK-NEXT: csinv x20, x8, xzr, le ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov s8, v0.s[1] @@ -977,40 +978,27 @@ define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) { ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s0, s9 ; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: csel x21, x25, x9, gt -; CHECK-NEXT: csinv x22, x8, xzr, le -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, xzr, x1, lt -; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csel x23, x25, x9, gt ; CHECK-NEXT: csinv x24, x8, xzr, le ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov x2, x20 -; CHECK-NEXT: mov x3, x19 -; CHECK-NEXT: mov x4, x22 -; CHECK-NEXT: mov x5, x21 -; CHECK-NEXT: mov x6, x24 -; CHECK-NEXT: fcmp s0, #0.0 -; CHECK-NEXT: mov x7, x23 -; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: fcmp s8, #0.0 +; CHECK-NEXT: mov x2, x22 +; CHECK-NEXT: mov x3, x21 +; CHECK-NEXT: mov x4, x24 +; CHECK-NEXT: mov x5, x23 +; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt -; CHECK-NEXT: fcmp s0, s9 -; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csel x1, x25, x9, gt -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ldp x30, x25, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: mov x0, x20 +; CHECK-NEXT: mov x1, x19 +; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: csel x7, x25, x9, gt +; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x25, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: csinv x6, x8, xzr, le +; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %x = call <4 x i100> @llvm.fptoui.sat.v4f32.v4i100(<4 x float> %f) ret <4 x i100> %x @@ -1019,13 +1007,13 @@ define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) { define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i128: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill -; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 112 +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 96 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 @@ -1035,22 +1023,31 @@ define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) { ; CHECK-NEXT: .cfi_offset w30, -64 ; CHECK-NEXT: .cfi_offset b8, -72 ; CHECK-NEXT: .cfi_offset b9, -80 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl __fixunssfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff +; CHECK-NEXT: fmov s9, w8 ; CHECK-NEXT: mov s8, v0.s[1] -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: csel x8, xzr, x1, lt +; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: fcmp s0, s9 ; CHECK-NEXT: fmov s0, s8 +; CHECK-NEXT: csinv x19, x9, xzr, le +; CHECK-NEXT: csinv x20, x8, xzr, le ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: fmov s9, w8 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: csinv x21, x9, xzr, le +; CHECK-NEXT: csinv x22, x8, xzr, le ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: csinv x19, x9, xzr, le -; CHECK-NEXT: csinv x20, x8, xzr, le ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov s8, v0.s[1] @@ -1059,40 +1056,27 @@ define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) { ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s0, s9 ; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: csinv x21, x9, xzr, le -; CHECK-NEXT: csinv x22, x8, xzr, le +; CHECK-NEXT: csinv x23, x9, xzr, le +; CHECK-NEXT: csinv x24, x8, xzr, le ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: mov x2, x21 +; CHECK-NEXT: mov x3, x22 +; CHECK-NEXT: mov x4, x23 +; CHECK-NEXT: mov x5, x24 +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csinv x23, x9, xzr, le -; CHECK-NEXT: csinv x24, x8, xzr, le -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: mov x4, x21 -; CHECK-NEXT: mov x5, x22 -; CHECK-NEXT: mov x6, x23 -; CHECK-NEXT: fcmp s0, #0.0 -; CHECK-NEXT: mov x7, x24 -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload -; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, xzr, x1, lt -; CHECK-NEXT: fcmp s0, s9 -; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csinv x1, x9, xzr, le -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: csinv x6, x9, xzr, le +; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: csinv x7, x8, xzr, le +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %x = call <4 x i128> @llvm.fptoui.sat.v4f32.v4i128(<4 x float> %f) ret <4 x i128> %x @@ -1261,37 +1245,33 @@ define <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) { ; CHECK-NEXT: .cfi_offset w30, -32 ; CHECK-NEXT: .cfi_offset b8, -40 ; CHECK-NEXT: .cfi_offset b9, -48 -; CHECK-NEXT: mov d8, v0.d[1] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: fmov d0, d8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: bl __fixunsdfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x8, #5057542381537067007 // =0x462fffffffffffff -; CHECK-NEXT: fcmp d8, #0.0 ; CHECK-NEXT: mov x21, #68719476735 // =0xfffffffff ; CHECK-NEXT: fmov d9, x8 -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: mov d8, v0.d[1] +; CHECK-NEXT: fcmp d0, #0.0 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt -; CHECK-NEXT: fcmp d8, d9 +; CHECK-NEXT: fcmp d0, d9 +; CHECK-NEXT: fmov d0, d8 ; CHECK-NEXT: csel x19, x21, x9, gt ; CHECK-NEXT: csinv x20, x8, xzr, le ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x2, x20 -; CHECK-NEXT: mov x3, x19 -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: fcmp d0, #0.0 +; CHECK-NEXT: fcmp d8, #0.0 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt -; CHECK-NEXT: fcmp d0, d9 -; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csel x1, x21, x9, gt -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fcmp d8, d9 +; CHECK-NEXT: mov x0, x20 +; CHECK-NEXT: mov x1, x19 +; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: csel x3, x21, x9, gt ; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: csinv x2, x8, xzr, le ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret %x = call <2 x i100> @llvm.fptoui.sat.v2f64.v2i100(<2 x double> %f) @@ -1311,36 +1291,32 @@ define <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) { ; CHECK-NEXT: .cfi_offset w30, -32 ; CHECK-NEXT: .cfi_offset b8, -40 ; CHECK-NEXT: .cfi_offset b9, -48 -; CHECK-NEXT: mov d8, v0.d[1] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: fmov d0, d8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: mov x8, #5183643171103440895 // =0x47efffffffffffff -; CHECK-NEXT: fcmp d8, #0.0 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x8, #5183643171103440895 // =0x47efffffffffffff ; CHECK-NEXT: fmov d9, x8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: mov d8, v0.d[1] +; CHECK-NEXT: fcmp d0, #0.0 ; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp d8, d9 +; CHECK-NEXT: fcmp d0, d9 +; CHECK-NEXT: fmov d0, d8 ; CHECK-NEXT: csinv x19, x9, xzr, le ; CHECK-NEXT: csinv x20, x8, xzr, le ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: fcmp d8, #0.0 ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, xzr, x1, lt -; CHECK-NEXT: fcmp d0, d9 +; CHECK-NEXT: csel x8, xzr, x1, lt +; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: fcmp d8, d9 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csinv x1, x9, xzr, le -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: csinv x2, x9, xzr, le +; CHECK-NEXT: csinv x3, x8, xzr, le ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret %x = call <2 x i128> @llvm.fptoui.sat.v2f64.v2i128(<2 x double> %f) @@ -1570,7 +1546,7 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) { ; CHECK-NEXT: .cfi_offset b8, -72 ; CHECK-NEXT: .cfi_offset b9, -80 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h1, v0.h[2] +; CHECK-NEXT: mov h1, v0.h[1] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: fcvt s8, h1 ; CHECK-NEXT: fmov s0, s8 @@ -1580,7 +1556,7 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) { ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: fmov s9, w8 ; CHECK-NEXT: mov x25, #68719476735 // =0xfffffffff -; CHECK-NEXT: mov h0, v0.h[1] +; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 @@ -1589,9 +1565,8 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) { ; CHECK-NEXT: csinv x20, x9, xzr, le ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 @@ -1600,8 +1575,9 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) { ; CHECK-NEXT: csinv x22, x8, xzr, le ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: fcmp s8, #0.0 +; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 @@ -1611,25 +1587,22 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) { ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov x2, x22 -; CHECK-NEXT: mov x3, x21 -; CHECK-NEXT: mov x4, x20 -; CHECK-NEXT: mov x5, x19 -; CHECK-NEXT: mov x6, x24 -; CHECK-NEXT: mov x7, x23 +; CHECK-NEXT: mov x2, x20 +; CHECK-NEXT: mov x3, x19 +; CHECK-NEXT: mov x4, x22 +; CHECK-NEXT: mov x5, x21 ; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: mov x0, x24 +; CHECK-NEXT: mov x1, x23 ; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: csel x7, x25, x9, gt ; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csel x1, x25, x9, gt -; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ldp x30, x25, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: csinv x6, x8, xzr, le ; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %x = call <4 x i100> @llvm.fptoui.sat.v4f16.v4i100(<4 x half> %f) @@ -1656,16 +1629,15 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) { ; CHECK-NEXT: .cfi_offset b8, -72 ; CHECK-NEXT: .cfi_offset b9, -80 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h1, v0.h[1] +; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: fcvt s8, h1 ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 @@ -1676,7 +1648,7 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) { ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 @@ -1685,8 +1657,9 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) { ; CHECK-NEXT: csinv x22, x8, xzr, le ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: fcmp s8, #0.0 +; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 @@ -1696,25 +1669,22 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) { ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: mov x4, x21 -; CHECK-NEXT: mov x5, x22 -; CHECK-NEXT: mov x6, x23 -; CHECK-NEXT: mov x7, x24 -; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, xzr, x1, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: mov x2, x21 +; CHECK-NEXT: mov x3, x22 +; CHECK-NEXT: mov x4, x23 +; CHECK-NEXT: mov x5, x24 ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: csel x8, xzr, x1, lt +; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csinv x1, x9, xzr, le -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: csinv x6, x9, xzr, le ; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: csinv x7, x8, xzr, le ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %x = call <4 x i128> @llvm.fptoui.sat.v4f16.v4i128(<4 x half> %f) diff --git a/llvm/test/CodeGen/AArch64/nontemporal-load.ll b/llvm/test/CodeGen/AArch64/nontemporal-load.ll index e93fb40897078..ed84ec2ba9eae 100644 --- a/llvm/test/CodeGen/AArch64/nontemporal-load.ll +++ b/llvm/test/CodeGen/AArch64/nontemporal-load.ll @@ -473,52 +473,37 @@ define <33 x i8> @test_ldnp_v33i8(ptr %A) { define <4 x i65> @test_ldnp_v4i65(ptr %A) { ; CHECK-LABEL: test_ldnp_v4i65: ; CHECK: ; %bb.0: -; CHECK-NEXT: ldp x8, x9, [x0, #8] -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr x10, [x0, #24] +; CHECK-NEXT: ldp x8, x9, [x0, #16] ; CHECK-NEXT: ldrb w11, [x0, #32] -; CHECK-NEXT: and x1, x8, #0x1 -; CHECK-NEXT: extr x2, x9, x8, #1 -; CHECK-NEXT: extr x4, x10, x9, #2 -; CHECK-NEXT: mov.d v0[1], x1 -; CHECK-NEXT: extr x6, x11, x10, #3 -; CHECK-NEXT: ubfx x3, x9, #1, #1 -; CHECK-NEXT: ubfx x5, x10, #2, #1 +; CHECK-NEXT: ldp x0, x10, [x0] ; CHECK-NEXT: ubfx x7, x11, #3, #1 -; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: extr x4, x9, x8, #2 +; CHECK-NEXT: extr x6, x11, x9, #3 +; CHECK-NEXT: ubfx x3, x8, #1, #1 +; CHECK-NEXT: extr x2, x8, x10, #1 +; CHECK-NEXT: ubfx x5, x9, #2, #1 +; CHECK-NEXT: and x1, x10, #0x1 ; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: test_ldnp_v4i65: ; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldp x10, x9, [x0] -; CHECK-BE-NEXT: ldrb w8, [x0, #32] -; CHECK-BE-NEXT: ldp x12, x11, [x0, #16] -; CHECK-BE-NEXT: lsr x13, x10, #56 -; CHECK-BE-NEXT: orr x7, x8, x11, lsl #8 -; CHECK-BE-NEXT: extr x8, x10, x9, #56 -; CHECK-BE-NEXT: extr x11, x12, x11, #56 -; CHECK-BE-NEXT: lsr x14, x12, #56 -; CHECK-BE-NEXT: extr x15, x9, x12, #56 -; CHECK-BE-NEXT: lsr x10, x10, #59 -; CHECK-BE-NEXT: extr x1, x13, x8, #3 -; CHECK-BE-NEXT: lsr x8, x9, #56 -; CHECK-BE-NEXT: ubfx x12, x12, #57, #1 -; CHECK-BE-NEXT: ubfx x9, x9, #58, #1 -; CHECK-BE-NEXT: extr x5, x14, x11, #1 -; CHECK-BE-NEXT: and x11, x11, #0x1 -; CHECK-BE-NEXT: fmov d0, x10 -; CHECK-BE-NEXT: fmov d2, x12 -; CHECK-BE-NEXT: fmov d3, x11 -; CHECK-BE-NEXT: fmov d1, x9 -; CHECK-BE-NEXT: extr x3, x8, x15, #2 -; CHECK-BE-NEXT: mov v0.d[1], x1 -; CHECK-BE-NEXT: mov v2.d[1], x5 -; CHECK-BE-NEXT: mov v3.d[1], x7 -; CHECK-BE-NEXT: mov v1.d[1], x3 -; CHECK-BE-NEXT: fmov x0, d0 -; CHECK-BE-NEXT: fmov x4, d2 -; CHECK-BE-NEXT: fmov x6, d3 -; CHECK-BE-NEXT: fmov x2, d1 +; CHECK-BE-NEXT: ldp x9, x8, [x0] +; CHECK-BE-NEXT: ldrb w12, [x0, #32] +; CHECK-BE-NEXT: ldp x10, x11, [x0, #16] +; CHECK-BE-NEXT: extr x13, x9, x8, #56 +; CHECK-BE-NEXT: lsr x14, x9, #56 +; CHECK-BE-NEXT: lsr x16, x8, #56 +; CHECK-BE-NEXT: extr x15, x8, x10, #56 +; CHECK-BE-NEXT: orr x7, x12, x11, lsl #8 +; CHECK-BE-NEXT: extr x11, x10, x11, #56 +; CHECK-BE-NEXT: lsr x12, x10, #56 +; CHECK-BE-NEXT: extr x1, x14, x13, #3 +; CHECK-BE-NEXT: lsr x0, x9, #59 +; CHECK-BE-NEXT: ubfx x2, x8, #58, #1 +; CHECK-BE-NEXT: ubfx x4, x10, #57, #1 +; CHECK-BE-NEXT: extr x3, x16, x15, #2 +; CHECK-BE-NEXT: extr x5, x12, x11, #1 +; CHECK-BE-NEXT: and x6, x11, #0x1 ; CHECK-BE-NEXT: ret %lv = load <4 x i65>, ptr %A, align 8, !nontemporal !0 ret <4 x i65> %lv diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll index 84179d3694a9d..fa0447c2c5d79 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -483,21 +483,18 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; CHECK-LABEL: v2i128: ; CHECK: // %bb.0: +; CHECK-NEXT: adds x8, x0, x4 +; CHECK-NEXT: adcs x9, x1, x5 +; CHECK-NEXT: asr x10, x9, #63 +; CHECK-NEXT: eor x11, x10, #0x8000000000000000 +; CHECK-NEXT: csel x0, x10, x8, vs +; CHECK-NEXT: csel x1, x11, x9, vs ; CHECK-NEXT: adds x8, x2, x6 ; CHECK-NEXT: adcs x9, x3, x7 ; CHECK-NEXT: asr x10, x9, #63 ; CHECK-NEXT: eor x11, x10, #0x8000000000000000 ; CHECK-NEXT: csel x2, x10, x8, vs ; CHECK-NEXT: csel x3, x11, x9, vs -; CHECK-NEXT: adds x8, x0, x4 -; CHECK-NEXT: adcs x9, x1, x5 -; CHECK-NEXT: asr x10, x9, #63 -; CHECK-NEXT: csel x8, x10, x8, vs -; CHECK-NEXT: eor x11, x10, #0x8000000000000000 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: csel x1, x11, x9, vs -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y) ret <2 x i128> %z diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index aca9e58c1a1ba..d8b2762cf15e9 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -486,21 +486,18 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; CHECK-LABEL: v2i128: ; CHECK: // %bb.0: +; CHECK-NEXT: subs x8, x0, x4 +; CHECK-NEXT: sbcs x9, x1, x5 +; CHECK-NEXT: asr x10, x9, #63 +; CHECK-NEXT: eor x11, x10, #0x8000000000000000 +; CHECK-NEXT: csel x0, x10, x8, vs +; CHECK-NEXT: csel x1, x11, x9, vs ; CHECK-NEXT: subs x8, x2, x6 ; CHECK-NEXT: sbcs x9, x3, x7 ; CHECK-NEXT: asr x10, x9, #63 ; CHECK-NEXT: eor x11, x10, #0x8000000000000000 ; CHECK-NEXT: csel x2, x10, x8, vs ; CHECK-NEXT: csel x3, x11, x9, vs -; CHECK-NEXT: subs x8, x0, x4 -; CHECK-NEXT: sbcs x9, x1, x5 -; CHECK-NEXT: asr x10, x9, #63 -; CHECK-NEXT: csel x8, x10, x8, vs -; CHECK-NEXT: eor x11, x10, #0x8000000000000000 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: csel x1, x11, x9, vs -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %z = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %x, <2 x i128> %y) ret <2 x i128> %z diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index 3c6c1f1618a95..afc0d8704ebac 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -477,17 +477,14 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; CHECK-LABEL: v2i128: ; CHECK: // %bb.0: +; CHECK-NEXT: adds x8, x0, x4 +; CHECK-NEXT: adcs x9, x1, x5 +; CHECK-NEXT: csinv x0, x8, xzr, lo +; CHECK-NEXT: csinv x1, x9, xzr, lo ; CHECK-NEXT: adds x8, x2, x6 ; CHECK-NEXT: adcs x9, x3, x7 ; CHECK-NEXT: csinv x2, x8, xzr, lo ; CHECK-NEXT: csinv x3, x9, xzr, lo -; CHECK-NEXT: adds x8, x0, x4 -; CHECK-NEXT: adcs x9, x1, x5 -; CHECK-NEXT: csinv x8, x8, xzr, lo -; CHECK-NEXT: csinv x1, x9, xzr, lo -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %z = call <2 x i128> @llvm.uadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y) ret <2 x i128> %z diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index 363c12e12cb8b..dfcbe96ea948a 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -475,17 +475,14 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; CHECK-LABEL: v2i128: ; CHECK: // %bb.0: +; CHECK-NEXT: subs x8, x0, x4 +; CHECK-NEXT: sbcs x9, x1, x5 +; CHECK-NEXT: csel x0, xzr, x8, lo +; CHECK-NEXT: csel x1, xzr, x9, lo ; CHECK-NEXT: subs x8, x2, x6 ; CHECK-NEXT: sbcs x9, x3, x7 ; CHECK-NEXT: csel x2, xzr, x8, lo ; CHECK-NEXT: csel x3, xzr, x9, lo -; CHECK-NEXT: subs x8, x0, x4 -; CHECK-NEXT: sbcs x9, x1, x5 -; CHECK-NEXT: csel x8, xzr, x8, lo -; CHECK-NEXT: csel x1, xzr, x9, lo -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %z = call <2 x i128> @llvm.usub.sat.v2i128(<2 x i128> %x, <2 x i128> %y) ret <2 x i128> %z diff --git a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll index 570834fb67010..81b6a6940a7d6 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll @@ -2318,43 +2318,41 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { ; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: strb.w r4, [r9, #49] ; CHECK-NEXT: it lt -; CHECK-NEXT: mvnlt r3, #7 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s17, s17 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #7 +; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s17, s22 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: lsrl r0, r1, #28 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: vcmp.f32 s17, s17 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r3 +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 ; CHECK-NEXT: vcmp.f32 s17, s22 -; CHECK-NEXT: vmov r1, s1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vcmp.f32 s17, s20 +; CHECK-NEXT: strd r0, r1, [r9, #16] +; CHECK-NEXT: it lt +; CHECK-NEXT: mvnlt r3, #7 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: lsrl r0, r1, #28 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: movgt r3, #7 ; CHECK-NEXT: vcmp.f32 s17, s17 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 -; CHECK-NEXT: strd r0, r1, [r9, #16] +; CHECK-NEXT: movvs r3, #0 ; CHECK-NEXT: and r1, r3, #15 ; CHECK-NEXT: lsrl r2, r1, #28 ; CHECK-NEXT: strb.w r2, [r9, #24] @@ -3687,268 +3685,257 @@ define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) { ; CHECK-NEXT: sub sp, #48 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI40_0 -; CHECK-NEXT: vmov r5, r7, d8 -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: vmov r9, r3, d0 -; CHECK-NEXT: str r0, [sp, #40] @ 4-byte Spill -; CHECK-NEXT: str r3, [sp, #32] @ 4-byte Spill -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: vmov r10, r9, d8 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: vmov r7, r3, d0 +; CHECK-NEXT: str r3, [sp, #40] @ 4-byte Spill +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: mov r1, r9 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: str r7, [sp, #44] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: vldr d0, .LCPI40_1 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: mov r1, r9 ; CHECK-NEXT: vmov r8, r3, d0 ; CHECK-NEXT: mov r2, r8 ; CHECK-NEXT: mov r11, r3 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: mov r1, r9 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill ; CHECK-NEXT: csel r4, r2, r4, ne -; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: mov r1, r9 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: cmp r5, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne.w r4, #-1 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: str.w r4, [r10, #8] -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: ldr r4, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: str.w r9, [sp, #44] @ 4-byte Spill -; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: str r4, [r6, #8] +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: ldr r5, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: mov r1, r9 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: mov r1, r9 ; CHECK-NEXT: mov r2, r8 ; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: str.w r8, [sp, #32] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: csel r6, r1, r0, ne -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: csel r7, r1, r0, ne +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: mov r1, r9 +; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r6, #-1 +; CHECK-NEXT: movne.w r7, #-1 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #0 -; CHECK-NEXT: ldr r0, [sp, #40] @ 4-byte Reload -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: str r6, [r0, #4] -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: str r7, [r6, #4] +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: mov r1, r9 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: str r6, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: mov r1, r9 ; CHECK-NEXT: mov r2, r8 ; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: mov r10, r8 +; CHECK-NEXT: str.w r11, [sp, #36] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: csel r6, r1, r0, ne -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: csel r7, r1, r0, ne +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: mov r1, r9 ; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r6, #-1 -; CHECK-NEXT: str r5, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: str r7, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: movne.w r7, #-1 +; CHECK-NEXT: str.w r10, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: str.w r9, [sp, #24] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: vmov r9, r8, d9 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #0 -; CHECK-NEXT: ldr r0, [sp, #40] @ 4-byte Reload -; CHECK-NEXT: str r6, [r0] -; CHECK-NEXT: ldr r6, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: str r7, [r6] +; CHECK-NEXT: ldr r6, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: ldr.w r10, [sp, #32] @ 4-byte Reload ; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: mov r4, r10 -; CHECK-NEXT: str.w r10, [sp, #28] @ 4-byte Spill -; CHECK-NEXT: mov r5, r11 -; CHECK-NEXT: str.w r11, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r11, r0 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov r10, r3 -; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: str r1, [sp, #36] @ 4-byte Spill ; CHECK-NEXT: cmp.w r11, #0 -; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: it eq -; CHECK-NEXT: mvneq r10, #7 +; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: csel r11, r1, r11, ne +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: mov r2, r9 ; CHECK-NEXT: mov r3, r8 ; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #7 +; CHECK-NEXT: movne.w r11, #-1 +; CHECK-NEXT: bl __aeabi_dcmpun +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r11, #0 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: ldr r5, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r6, r10 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: csel r10, r4, r0, ne +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r10, #-1 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne.w r10, #0 ; CHECK-NEXT: ldr r7, [sp, #44] @ 4-byte Reload ; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: ldr r3, [sp, #40] @ 4-byte Reload ; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: str.w r10, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: lsrl r10, r11, #28 ; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r11, r0 +; CHECK-NEXT: str r0, [sp] @ 4-byte Spill ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r2, r9 ; CHECK-NEXT: mov r3, r8 ; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload ; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: cmp.w r11, #0 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne.w r4, #-1 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r4, r10 +; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: orr.w r0, r11, r4, lsl #4 ; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: vmov r0, s1 ; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: mov r5, r6 -; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: strd r10, r0, [r6, #16] ; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: ldr.w r11, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: mov r3, r11 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: ldr r7, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: mov r10, r0 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r3, r6 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: csel r11, r1, r0, ne -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r11, #-1 -; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: it eq +; CHECK-NEXT: mvneq r0, #7 +; CHECK-NEXT: cmp.w r10, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r11, #0 -; CHECK-NEXT: ldr r1, [sp, #36] @ 4-byte Reload -; CHECK-NEXT: mov r4, r11 -; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: lsrl r4, r1, #28 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: str r1, [sp, #36] @ 4-byte Spill -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: movne r0, #7 ; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: mov r7, r6 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r2, r9 ; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: csel r6, r1, r0, ne -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r6, #-1 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #0 -; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload -; CHECK-NEXT: and r1, r10, #15 -; CHECK-NEXT: ldr r2, [sp, #40] @ 4-byte Reload -; CHECK-NEXT: orr.w r0, r0, r6, lsl #4 -; CHECK-NEXT: lsrl r6, r1, #28 -; CHECK-NEXT: strd r4, r0, [r2, #16] -; CHECK-NEXT: mov r8, r2 -; CHECK-NEXT: strb r6, [r2, #24] -; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: movne r5, #0 +; CHECK-NEXT: and r1, r5, #15 +; CHECK-NEXT: mov r8, r6 +; CHECK-NEXT: lsrl r4, r1, #28 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: strb r4, [r6, #24] ; CHECK-NEXT: ldr r6, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: ldr r5, [sp, #24] @ 4-byte Reload ; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: ldr r3, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: ldr r3, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload ; CHECK-NEXT: it eq ; CHECK-NEXT: mvneq r0, #7 ; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r0, #7 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: and r0, r4, #15 -; CHECK-NEXT: orr.w r0, r0, r11, lsl #4 +; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 ; CHECK-NEXT: str.w r0, [r8, #12] ; CHECK-NEXT: add sp, #48 ; CHECK-NEXT: vpop {d8, d9} @@ -5433,7 +5420,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: .pad #32 ; CHECK-NEXT: sub sp, #32 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: mov r11, r0 ; CHECK-NEXT: vcvtb.f32.f16 s21, s19 ; CHECK-NEXT: vcvtt.f32.f16 s24, s19 ; CHECK-NEXT: vmov r0, s21 @@ -5442,13 +5429,13 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: vcvtb.f32.f16 s30, s18 ; CHECK-NEXT: vldr s20, .LCPI50_2 ; CHECK-NEXT: vmov r8, s24 -; CHECK-NEXT: vmov r4, s26 +; CHECK-NEXT: vmov r9, s26 ; CHECK-NEXT: vcvtt.f32.f16 s22, s18 ; CHECK-NEXT: vmov r6, s28 -; CHECK-NEXT: vmov r5, s30 +; CHECK-NEXT: vmov r7, s30 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vldr s18, .LCPI50_3 -; CHECK-NEXT: mov r7, r3 +; CHECK-NEXT: mov r5, r3 ; CHECK-NEXT: vcmp.f32 s21, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s21, s20 @@ -5464,7 +5451,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s21, s20 -; CHECK-NEXT: str.w r2, [r9, #83] +; CHECK-NEXT: str.w r2, [r11, #83] ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr @@ -5476,7 +5463,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r1, [r9, #79] +; CHECK-NEXT: str.w r1, [r11, #79] ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vcmp.f32 s21, s20 @@ -5487,11 +5474,11 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str.w r0, [r9, #75] -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: str.w r0, [r11, #75] +; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s30, s18 -; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: mov r7, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s30, s20 ; CHECK-NEXT: it lt @@ -5506,7 +5493,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s30, s20 -; CHECK-NEXT: str.w r2, [r9, #58] +; CHECK-NEXT: str.w r2, [r11, #58] ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr @@ -5518,7 +5505,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r1, [r9, #54] +; CHECK-NEXT: str.w r1, [r11, #54] ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vcmp.f32 s30, s20 @@ -5529,7 +5516,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str.w r0, [r9, #50] +; CHECK-NEXT: str.w r0, [r11, #50] ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s28, s18 @@ -5548,7 +5535,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: str.w r2, [r9, #33] +; CHECK-NEXT: str.w r2, [r11, #33] ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr @@ -5560,7 +5547,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r1, [r9, #29] +; CHECK-NEXT: str.w r1, [r11, #29] ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vcmp.f32 s28, s20 @@ -5571,8 +5558,8 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str.w r0, [r9, #25] -; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: str.w r0, [r11, #25] +; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s26, s18 ; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill @@ -5590,7 +5577,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: str.w r2, [r9, #8] +; CHECK-NEXT: str.w r2, [r11, #8] ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr @@ -5602,7 +5589,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r1, [r9, #4] +; CHECK-NEXT: str.w r1, [r11, #4] ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vcmp.f32 s26, s20 @@ -5613,7 +5600,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str.w r0, [r9] +; CHECK-NEXT: str.w r0, [r11] ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s24, s18 @@ -5633,69 +5620,68 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: movvs r6, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt -; CHECK-NEXT: mvnlt r7, #7 +; CHECK-NEXT: mvnlt r5, #7 ; CHECK-NEXT: vcmp.f32 s21, s20 -; CHECK-NEXT: mov r11, r1 +; CHECK-NEXT: mov r9, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r7, #7 +; CHECK-NEXT: movgt r5, #7 ; CHECK-NEXT: vcmp.f32 s21, s21 -; CHECK-NEXT: mov r10, r2 +; CHECK-NEXT: mov r8, r2 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: and r0, r7, #15 +; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: and r0, r5, #15 ; CHECK-NEXT: orr.w r1, r0, r6, lsl #4 ; CHECK-NEXT: vmov r0, s22 -; CHECK-NEXT: str.w r1, [r9, #87] +; CHECK-NEXT: str.w r1, [r11, #87] ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s22, s18 -; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r10, r0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r8, #0 +; CHECK-NEXT: movlt.w r10, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s22, s22 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r8, #-1 +; CHECK-NEXT: movgt.w r10, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s30, s18 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r8, #0 +; CHECK-NEXT: movvs.w r10, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt -; CHECK-NEXT: mvnlt r5, #7 +; CHECK-NEXT: mvnlt r7, #7 ; CHECK-NEXT: vcmp.f32 s30, s20 -; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s30, s30 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r5, #7 +; CHECK-NEXT: movgt r7, #7 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: and r0, r5, #15 -; CHECK-NEXT: orr.w r0, r0, r8, lsl #4 +; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: and r0, r7, #15 +; CHECK-NEXT: orr.w r0, r0, r10, lsl #4 ; CHECK-NEXT: vcvtt.f32.f16 s30, s17 -; CHECK-NEXT: str.w r0, [r9, #62] +; CHECK-NEXT: str.w r0, [r11, #62] ; CHECK-NEXT: vmov r0, s30 -; CHECK-NEXT: mov r7, r3 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s30, s18 -; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill -; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vcmp.f32 s30, s20 -; CHECK-NEXT: vcvtt.f32.f16 s16, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: vcmp.f32 s30, s30 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 @@ -5715,8 +5701,9 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: and r0, r0, #15 +; CHECK-NEXT: vcvtt.f32.f16 s16, s16 ; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 -; CHECK-NEXT: str.w r0, [r9, #37] +; CHECK-NEXT: str.w r0, [r11, #37] ; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s16, s18 @@ -5732,26 +5719,26 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: vcmp.f32 s26, s18 -; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr r7, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt -; CHECK-NEXT: mvnlt r5, #7 +; CHECK-NEXT: mvnlt r7, #7 ; CHECK-NEXT: vcmp.f32 s26, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r5, #7 +; CHECK-NEXT: movgt r7, #7 ; CHECK-NEXT: vcmp.f32 s26, s26 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: movvs r7, #0 ; CHECK-NEXT: vcmp.f32 s24, s18 -; CHECK-NEXT: and r5, r5, #15 +; CHECK-NEXT: and r7, r7, #15 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: orr.w r5, r5, r0, lsl #4 -; CHECK-NEXT: str.w r5, [r9, #12] +; CHECK-NEXT: orr.w r7, r7, r0, lsl #4 +; CHECK-NEXT: str.w r7, [r11, #12] ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r11, #0 +; CHECK-NEXT: movlt.w r9, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: b.w .LBB50_3 ; CHECK-NEXT: .p2align 2 @@ -5766,181 +5753,176 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: .LBB50_3: ; CHECK-NEXT: vcmp.f32 s24, s24 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r11, #-1 +; CHECK-NEXT: movgt.w r9, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s24, s18 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r11, #0 +; CHECK-NEXT: movvs.w r9, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r10, #0 +; CHECK-NEXT: movlt.w r8, #0 ; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: lsrl r6, r11, #28 +; CHECK-NEXT: lsrl r6, r9, #28 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r10, #-1 +; CHECK-NEXT: movgt.w r8, #-1 ; CHECK-NEXT: vcmp.f32 s24, s24 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r10, #0 -; CHECK-NEXT: orr.w r5, r11, r10, lsl #4 -; CHECK-NEXT: str.w r5, [r9, #95] -; CHECK-NEXT: str.w r6, [r9, #91] +; CHECK-NEXT: movvs.w r8, #0 +; CHECK-NEXT: orr.w r7, r9, r8, lsl #4 +; CHECK-NEXT: str.w r7, [r11, #95] +; CHECK-NEXT: str.w r6, [r11, #91] ; CHECK-NEXT: vcmp.f32 s24, s18 -; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt -; CHECK-NEXT: mvnlt r6, #7 +; CHECK-NEXT: mvnlt r7, #7 ; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r6, #7 +; CHECK-NEXT: movgt r7, #7 ; CHECK-NEXT: vcmp.f32 s24, s24 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: and r5, r6, #15 +; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: and r7, r7, #15 ; CHECK-NEXT: vcmp.f32 s22, s18 -; CHECK-NEXT: lsrl r10, r5, #28 +; CHECK-NEXT: lsrl r8, r7, #28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: strb.w r10, [r9, #99] +; CHECK-NEXT: strb.w r8, [r11, #99] ; CHECK-NEXT: it lt -; CHECK-NEXT: mvnlt r7, #7 +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: vcmp.f32 s22, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, s22 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r7, #7 +; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: vcmp.f32 s22, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, s18 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: vcmp.f32 s22, s18 +; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #0 +; CHECK-NEXT: movlt r6, #0 ; CHECK-NEXT: vcmp.f32 s22, s20 +; CHECK-NEXT: lsrl r10, r5, #28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r4, #-1 +; CHECK-NEXT: movgt.w r6, #-1 ; CHECK-NEXT: vcmp.f32 s22, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r4, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r4, r7 +; CHECK-NEXT: movvs r6, #0 +; CHECK-NEXT: orr.w r7, r5, r6, lsl #4 +; CHECK-NEXT: str.w r7, [r11, #70] +; CHECK-NEXT: str.w r10, [r11, #66] ; CHECK-NEXT: vcmp.f32 s22, s18 -; CHECK-NEXT: ldr r4, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: vmov r5, s1 +; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #0 +; CHECK-NEXT: mvnlt r7, #7 ; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: lsrl r8, r5, #28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r4, #-1 +; CHECK-NEXT: movgt r7, #7 ; CHECK-NEXT: vcmp.f32 s22, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r4, #0 -; CHECK-NEXT: orr.w r6, r5, r4, lsl #4 +; CHECK-NEXT: movvs r7, #0 ; CHECK-NEXT: and r5, r7, #15 -; CHECK-NEXT: lsrl r4, r5, #28 -; CHECK-NEXT: str.w r6, [r9, #70] -; CHECK-NEXT: str.w r8, [r9, #66] ; CHECK-NEXT: vcmp.f32 s30, s18 -; CHECK-NEXT: strb.w r4, [r9, #74] +; CHECK-NEXT: lsrl r6, r5, #28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ldr r4, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: vcmp.f32 s30, s20 +; CHECK-NEXT: mov r5, r4 +; CHECK-NEXT: strb.w r6, [r11, #74] ; CHECK-NEXT: it lt -; CHECK-NEXT: mvnlt r4, #7 +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: vcmp.f32 s30, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r4, #7 +; CHECK-NEXT: movgt.w r5, #-1 ; CHECK-NEXT: vcmp.f32 s30, s30 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r4, #0 +; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: ldr r4, [sp] @ 4-byte Reload ; CHECK-NEXT: vcmp.f32 s30, s18 -; CHECK-NEXT: ldr r7, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt r6, #0 ; CHECK-NEXT: vcmp.f32 s30, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: lsrl r4, r5, #28 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: movgt.w r6, #-1 ; CHECK-NEXT: vcmp.f32 s30, s30 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r7, r4 +; CHECK-NEXT: movvs r6, #0 +; CHECK-NEXT: orr.w r7, r5, r6, lsl #4 +; CHECK-NEXT: str.w r7, [r11, #45] +; CHECK-NEXT: str.w r4, [r11, #41] ; CHECK-NEXT: vcmp.f32 s30, s18 -; CHECK-NEXT: ldr.w r12, [sp] @ 4-byte Reload -; CHECK-NEXT: vmov r5, s1 -; CHECK-NEXT: ldr r6, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: mvnlt r7, #7 ; CHECK-NEXT: vcmp.f32 s30, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: lsrl r12, r5, #28 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-NEXT: movgt r7, #7 ; CHECK-NEXT: vcmp.f32 s30, s30 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: orr.w r7, r5, r6, lsl #4 -; CHECK-NEXT: and r5, r4, #15 +; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: and r5, r7, #15 ; CHECK-NEXT: vcmp.f32 s16, s18 ; CHECK-NEXT: lsrl r6, r5, #28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: str.w r7, [r9, #45] -; CHECK-NEXT: str.w r12, [r9, #41] -; CHECK-NEXT: strb.w r6, [r9, #49] +; CHECK-NEXT: strb.w r6, [r11, #49] ; CHECK-NEXT: it lt -; CHECK-NEXT: mvnlt r3, #7 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s16, s16 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #7 +; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s16, s18 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: lsrl r0, r1, #28 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: vcmp.f32 s16, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r3 +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 ; CHECK-NEXT: vcmp.f32 s16, s18 -; CHECK-NEXT: vmov r1, s1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: strd r0, r1, [r11, #16] +; CHECK-NEXT: it lt +; CHECK-NEXT: mvnlt r3, #7 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: lsrl r0, r1, #28 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: movgt r3, #7 ; CHECK-NEXT: vcmp.f32 s16, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 -; CHECK-NEXT: strd r0, r1, [r9, #16] +; CHECK-NEXT: movvs r3, #0 ; CHECK-NEXT: and r1, r3, #15 ; CHECK-NEXT: lsrl r2, r1, #28 -; CHECK-NEXT: strb.w r2, [r9, #24] +; CHECK-NEXT: strb.w r2, [r11, #24] ; CHECK-NEXT: add sp, #32 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: add sp, #4 diff --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll index 2b6d0da531704..5ab184a066e49 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll @@ -1879,26 +1879,16 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) { ; CHECK-NEXT: vcmp.f32 s17, #0 ; CHECK-NEXT: lsrl r6, r5, #28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: strb.w r6, [r8, #49] ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r3 ; CHECK-NEXT: vcmp.f32 s17, #0 -; CHECK-NEXT: vmov r1, s1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: lsrl r0, r1, #28 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vcmp.f32 s17, s20 @@ -1906,7 +1896,15 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) { ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 +; CHECK-NEXT: vcmp.f32 s17, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: strd r0, r1, [r8, #16] +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r3, #15 ; CHECK-NEXT: and r1, r3, #15 ; CHECK-NEXT: lsrl r2, r1, #28 ; CHECK-NEXT: strb.w r2, [r8, #24] @@ -2925,195 +2923,191 @@ define arm_aapcs_vfpcc <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) { ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI40_0 ; CHECK-NEXT: vmov r6, r5, d8 -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: vmov r2, r9, d0 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vmov r2, r7, d0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: mov r10, r2 +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: mov r9, r2 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: vldr d0, .LCPI40_1 -; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r11, r0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: vmov r11, r3, d0 -; CHECK-NEXT: str r3, [sp, #44] @ 4-byte Spill -; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: str r2, [sp, #44] @ 4-byte Spill +; CHECK-NEXT: mov r10, r3 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: str r5, [sp, #36] @ 4-byte Spill ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: cmp.w r8, #0 ; CHECK-NEXT: strd r1, r0, [sp, #20] @ 8-byte Folded Spill -; CHECK-NEXT: csel r0, r2, r4, ne +; CHECK-NEXT: csel r0, r2, r8, ne ; CHECK-NEXT: str r3, [sp, #40] @ 4-byte Spill -; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: cmp.w r11, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: str.w r0, [r8, #8] +; CHECK-NEXT: str r0, [r4, #8] ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr r3, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r11, r6 +; CHECK-NEXT: ldr r6, [sp, #44] @ 4-byte Reload ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: mov r7, r6 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: csel r0, r1, r0, ne -; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: cmp.w r8, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: str.w r0, [r8, #4] -; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: str r0, [r4, #4] +; CHECK-NEXT: mov r0, r11 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r6, r8 -; CHECK-NEXT: strd r8, r7, [sp, #28] @ 8-byte Folded Spill +; CHECK-NEXT: strd r4, r11, [sp, #28] @ 8-byte Folded Spill +; CHECK-NEXT: str r5, [sp, #36] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: ldr r7, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r0, r11 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: mov r5, r11 -; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: vmov r8, r11, d9 ; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r2, r9 ; CHECK-NEXT: csel r0, r1, r0, ne -; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: str r0, [r6] -; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: str r0, [r4] +; CHECK-NEXT: ldr r4, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: mov r5, r9 +; CHECK-NEXT: mov r3, r4 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r11 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: mov r6, r5 -; CHECK-NEXT: str r5, [sp] @ 4-byte Spill -; CHECK-NEXT: mov r5, r7 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: mov r6, r10 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r10, r0 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r11 ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: strd r0, r2, [sp, #20] @ 8-byte Folded Spill -; CHECK-NEXT: csel r0, r3, r7, ne -; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #15 -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: add.w r12, sp, #16 +; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: stm.w r12, {r0, r2, r3} @ 12-byte Folded Spill +; CHECK-NEXT: csel r9, r1, r10, ne ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r9, #-1 +; CHECK-NEXT: mov r7, r5 +; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mov r10, r4 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: ldr r5, [sp, #44] @ 4-byte Reload ; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: str r6, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: mov r2, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: csel r0, r1, r0, ne -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: vmov q0[3], q0[1], r0, r7 -; CHECK-NEXT: vmov r0, s1 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: csel r4, r1, r0, ne +; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload ; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r6, r10 -; CHECK-NEXT: str.w r10, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: str.w r9, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r4, #-1 ; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: str r4, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: lsrl r4, r9, #28 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr r7, [sp] @ 4-byte Reload -; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r6 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: csel r6, r1, r0, ne +; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: mov r10, r4 -; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: movne.w r6, #-1 +; CHECK-NEXT: ldr r5, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: orr.w r0, r9, r6, lsl #4 ; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: lsrl r10, r5, #28 +; CHECK-NEXT: strd r4, r0, [r5, #16] +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: ldr.w r9, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r2, r9 ; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r11 ; CHECK-NEXT: ldr.w r11, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r9, r7 -; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: ldr.w r8, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r8 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: csel r0, r1, r0, ne -; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: orr.w r1, r5, r0, lsl #4 -; CHECK-NEXT: strd r10, r1, [r2, #16] -; CHECK-NEXT: mov r8, r2 -; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: and r1, r1, #15 -; CHECK-NEXT: lsrl r0, r1, #28 -; CHECK-NEXT: strb r0, [r2, #24] +; CHECK-NEXT: movne r0, #15 +; CHECK-NEXT: and r1, r0, #15 +; CHECK-NEXT: lsrl r6, r1, #28 +; CHECK-NEXT: strb r6, [r5, #24] ; CHECK-NEXT: ldr r6, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: ldr r7, [sp, #36] @ 4-byte Reload -; CHECK-NEXT: ldrd r3, r2, [sp, #8] @ 8-byte Folded Reload +; CHECK-NEXT: ldr r4, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r8 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: ldr r1, [sp, #40] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: csel r0, r1, r0, ne -; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r0, #15 +; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, r4, lsl #4 -; CHECK-NEXT: str.w r0, [r8, #12] +; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 +; CHECK-NEXT: str r0, [r5, #12] ; CHECK-NEXT: add sp, #48 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 @@ -4216,7 +4210,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: .pad #32 ; CHECK-NEXT: sub sp, #32 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: vcvtb.f32.f16 s30, s19 ; CHECK-NEXT: vcvtb.f32.f16 s28, s18 ; CHECK-NEXT: vmov r0, s30 @@ -4224,14 +4218,14 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: vcvtb.f32.f16 s24, s16 ; CHECK-NEXT: vcvtb.f32.f16 s26, s17 ; CHECK-NEXT: vldr s20, .LCPI50_1 -; CHECK-NEXT: vmov r8, s22 -; CHECK-NEXT: vmov r5, s28 +; CHECK-NEXT: vmov r4, s22 +; CHECK-NEXT: vmov r7, s28 ; CHECK-NEXT: vcvtt.f32.f16 s18, s18 -; CHECK-NEXT: vmov r4, s24 +; CHECK-NEXT: vmov r9, s24 ; CHECK-NEXT: vmov r6, s26 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s30, #0 -; CHECK-NEXT: mov r7, r3 +; CHECK-NEXT: mov r5, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s30, s20 ; CHECK-NEXT: it lt @@ -4242,7 +4236,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s30, s20 -; CHECK-NEXT: str.w r2, [r9, #83] +; CHECK-NEXT: str.w r2, [r8, #83] ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr @@ -4250,18 +4244,18 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r1, [r9, #79] +; CHECK-NEXT: str.w r1, [r8, #79] ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vcmp.f32 s30, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str.w r0, [r9, #75] -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: str.w r0, [r8, #75] +; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s28, #0 -; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: mov r7, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s28, s20 ; CHECK-NEXT: it lt @@ -4272,7 +4266,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: str.w r2, [r9, #58] +; CHECK-NEXT: str.w r2, [r8, #58] ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr @@ -4280,14 +4274,14 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r1, [r9, #54] +; CHECK-NEXT: str.w r1, [r8, #54] ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vcmp.f32 s28, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str.w r0, [r9, #50] +; CHECK-NEXT: str.w r0, [r8, #50] ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s26, #0 @@ -4302,7 +4296,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: str.w r2, [r9, #33] +; CHECK-NEXT: str.w r2, [r8, #33] ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr @@ -4310,18 +4304,18 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r1, [r9, #29] +; CHECK-NEXT: str.w r1, [r8, #29] ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vcmp.f32 s26, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str.w r0, [r9, #25] -; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: str.w r0, [r8, #25] +; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s24, #0 -; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: it lt @@ -4332,7 +4326,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: str.w r2, [r9, #8] +; CHECK-NEXT: str.w r2, [r8, #8] ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr @@ -4340,21 +4334,21 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r1, [r9, #4] +; CHECK-NEXT: str.w r1, [r8, #4] ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str.w r0, [r9] -; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: str.w r0, [r8] +; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s22, #0 ; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r6, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr @@ -4363,177 +4357,174 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: movgt.w r6, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt r5, #0 ; CHECK-NEXT: vcmp.f32 s30, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r7, #15 -; CHECK-NEXT: and r0, r7, #15 -; CHECK-NEXT: mov r11, r1 +; CHECK-NEXT: movgt r5, #15 +; CHECK-NEXT: and r0, r5, #15 +; CHECK-NEXT: mov r9, r1 ; CHECK-NEXT: orr.w r1, r0, r6, lsl #4 ; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: mov r10, r2 -; CHECK-NEXT: str.w r1, [r9, #87] +; CHECK-NEXT: mov r4, r2 +; CHECK-NEXT: str.w r1, [r8, #87] ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r10, r0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: mov r7, r3 -; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r8, #0 +; CHECK-NEXT: movlt.w r10, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s28, #0 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r8, #-1 +; CHECK-NEXT: movgt.w r10, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s28, s20 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r5, #15 -; CHECK-NEXT: and r0, r5, #15 +; CHECK-NEXT: movgt r7, #15 +; CHECK-NEXT: and r0, r7, #15 ; CHECK-NEXT: vcvtt.f32.f16 s28, s17 -; CHECK-NEXT: orr.w r0, r0, r8, lsl #4 -; CHECK-NEXT: str.w r0, [r9, #62] +; CHECK-NEXT: orr.w r0, r0, r10, lsl #4 +; CHECK-NEXT: str.w r0, [r8, #62] ; CHECK-NEXT: vmov r0, s28 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s28, #0 -; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill -; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: vcvtt.f32.f16 s16, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: mov r11, r1 ; CHECK-NEXT: vcmp.f32 s26, #0 ; CHECK-NEXT: mov r1, r0 ; CHECK-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: vcmp.f32 s26, s20 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vcmp.f32 s26, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r0, #15 ; CHECK-NEXT: and r0, r0, #15 +; CHECK-NEXT: vcvtt.f32.f16 s16, s16 ; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 -; CHECK-NEXT: str.w r0, [r9, #37] +; CHECK-NEXT: str.w r0, [r8, #37] ; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, #0 ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: vcmp.f32 s24, #0 +; CHECK-NEXT: ldr r7, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #0 +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r4, #15 -; CHECK-NEXT: and r5, r4, #15 +; CHECK-NEXT: movgt r7, #15 +; CHECK-NEXT: and r7, r7, #15 ; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: orr.w r5, r5, r0, lsl #4 +; CHECK-NEXT: orr.w r7, r7, r0, lsl #4 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r5, [r9, #12] +; CHECK-NEXT: str.w r7, [r8, #12] ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r11, #0 +; CHECK-NEXT: movlt.w r9, #0 ; CHECK-NEXT: vcmp.f32 s22, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r11, #-1 +; CHECK-NEXT: movgt.w r9, #-1 ; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: lsrl r6, r11, #28 +; CHECK-NEXT: lsrl r6, r9, #28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r10, #0 +; CHECK-NEXT: movlt r4, #0 ; CHECK-NEXT: vcmp.f32 s22, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: orr.w r5, r11, r10, lsl #4 -; CHECK-NEXT: str.w r5, [r9, #95] -; CHECK-NEXT: str.w r6, [r9, #91] +; CHECK-NEXT: movgt.w r4, #-1 +; CHECK-NEXT: orr.w r7, r9, r4, lsl #4 +; CHECK-NEXT: str.w r7, [r8, #95] +; CHECK-NEXT: str.w r6, [r8, #91] ; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: vcmp.f32 s22, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r6, #15 -; CHECK-NEXT: and r5, r6, #15 +; CHECK-NEXT: movgt r7, #15 +; CHECK-NEXT: and r7, r7, #15 ; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: lsrl r10, r5, #28 +; CHECK-NEXT: lsrl r4, r7, #28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: strb.w r10, [r9, #99] +; CHECK-NEXT: strb.w r4, [r8, #99] ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt r5, #0 ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r7, #15 +; CHECK-NEXT: movgt.w r5, #-1 ; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: lsrl r10, r5, #28 ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r6, #0 ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: vmov q0[3], q0[1], r6, r7 +; CHECK-NEXT: orr.w r7, r5, r6, lsl #4 +; CHECK-NEXT: str.w r7, [r8, #70] +; CHECK-NEXT: str.w r10, [r8, #66] ; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: vmov r5, s1 -; CHECK-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: lsrl r8, r5, #28 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #0 +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r4, #-1 -; CHECK-NEXT: orr.w r6, r5, r4, lsl #4 +; CHECK-NEXT: movgt r7, #15 ; CHECK-NEXT: and r5, r7, #15 -; CHECK-NEXT: lsrl r4, r5, #28 -; CHECK-NEXT: str.w r6, [r9, #70] -; CHECK-NEXT: str.w r8, [r9, #66] ; CHECK-NEXT: vcmp.f32 s28, #0 -; CHECK-NEXT: strb.w r4, [r9, #74] +; CHECK-NEXT: lsrl r6, r5, #28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: vcmp.f32 s28, s20 +; CHECK-NEXT: strb.w r6, [r8, #74] ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt.w r11, #0 +; CHECK-NEXT: vcmp.f32 s28, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r7, #15 -; CHECK-NEXT: mov r12, r7 +; CHECK-NEXT: movgt.w r11, #-1 +; CHECK-NEXT: ldr r4, [sp] @ 4-byte Reload ; CHECK-NEXT: vcmp.f32 s28, #0 -; CHECK-NEXT: ldr r7, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: lsrl r4, r11, #28 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt r6, #0 ; CHECK-NEXT: vcmp.f32 s28, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r7, #-1 ; CHECK-NEXT: b.w .LBB50_2 ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: @@ -4541,46 +4532,34 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: .long 0x717fffff @ float 1.26765052E+30 ; CHECK-NEXT: .p2align 1 ; CHECK-NEXT: .LBB50_2: -; CHECK-NEXT: vmov q0[3], q0[1], r7, r12 -; CHECK-NEXT: ldr r4, [sp] @ 4-byte Reload -; CHECK-NEXT: vmov r5, s1 -; CHECK-NEXT: ldr r6, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-NEXT: orr.w r7, r11, r6, lsl #4 +; CHECK-NEXT: str.w r7, [r8, #45] +; CHECK-NEXT: str.w r4, [r8, #41] ; CHECK-NEXT: vcmp.f32 s28, #0 -; CHECK-NEXT: lsrl r4, r5, #28 +; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: vcmp.f32 s28, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: orr.w r7, r5, r6, lsl #4 -; CHECK-NEXT: and r5, r12, #15 +; CHECK-NEXT: movgt r7, #15 +; CHECK-NEXT: and r5, r7, #15 ; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: lsrl r6, r5, #28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: str.w r7, [r9, #45] -; CHECK-NEXT: str.w r4, [r9, #41] -; CHECK-NEXT: strb.w r6, [r9, #49] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: strb.w r6, [r8, #49] ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r3 ; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: vmov r1, s1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: lsrl r0, r1, #28 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vcmp.f32 s16, s20 @@ -4588,10 +4567,18 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 -; CHECK-NEXT: strd r0, r1, [r9, #16] +; CHECK-NEXT: vcmp.f32 s16, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: strd r0, r1, [r8, #16] +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r3, #15 ; CHECK-NEXT: and r1, r3, #15 ; CHECK-NEXT: lsrl r2, r1, #28 -; CHECK-NEXT: strb.w r2, [r9, #24] +; CHECK-NEXT: strb.w r2, [r8, #24] ; CHECK-NEXT: add sp, #32 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: add sp, #4 diff --git a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll index f2c8440b177d8..55a621eaf4c9c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll +++ b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll @@ -207,10 +207,8 @@ define arm_aapcs_vfpcc <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK-NEXT: sbcs.w r12, r3, r1 ; CHECK-NEXT: cset r12, lt ; CHECK-NEXT: cmp.w r12, #0 -; CHECK-NEXT: csel r1, r1, r3, ne ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: csel r1, r1, r3, ne ; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: bx lr %c = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %a, <1 x i64> %b) @@ -486,10 +484,8 @@ define arm_aapcs_vfpcc <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK-NEXT: sbcs.w r12, r3, r1 ; CHECK-NEXT: cset r12, lo ; CHECK-NEXT: cmp.w r12, #0 -; CHECK-NEXT: csel r1, r1, r3, ne ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: csel r1, r1, r3, ne ; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: bx lr %c = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %a, <1 x i64> %b) @@ -772,10 +768,8 @@ define arm_aapcs_vfpcc <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK-NEXT: sbcs.w r12, r1, r3 ; CHECK-NEXT: cset r12, lt ; CHECK-NEXT: cmp.w r12, #0 -; CHECK-NEXT: csel r1, r1, r3, ne ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: csel r1, r1, r3, ne ; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: bx lr %c = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %a, <1 x i64> %b) @@ -1051,10 +1045,8 @@ define arm_aapcs_vfpcc <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK-NEXT: sbcs.w r12, r1, r3 ; CHECK-NEXT: cset r12, lo ; CHECK-NEXT: cmp.w r12, #0 -; CHECK-NEXT: csel r1, r1, r3, ne ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 -; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: csel r1, r1, r3, ne ; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: bx lr %c = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %a, <1 x i64> %b) diff --git a/llvm/test/CodeGen/Thumb2/mve-vst3.ll b/llvm/test/CodeGen/Thumb2/mve-vst3.ll index d80dd5a673e20..85317e1fe4626 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vst3.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vst3.ll @@ -535,22 +535,20 @@ define void @vst3_v2i8(ptr %src, ptr %dst) { ; CHECK-NEXT: .pad #16 ; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: ldrb r2, [r0] -; CHECK-NEXT: mov r4, sp -; CHECK-NEXT: ldrb r3, [r0, #1] -; CHECK-NEXT: ldrb.w r12, [r0, #2] -; CHECK-NEXT: vmov q0[2], q0[0], r2, r3 -; CHECK-NEXT: ldrb.w lr, [r0, #3] -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: ldrb r5, [r0, #5] +; CHECK-NEXT: mov r5, sp +; CHECK-NEXT: ldrb r3, [r0, #2] ; CHECK-NEXT: vmov.16 q0[0], r2 +; CHECK-NEXT: ldrb.w r12, [r0, #1] +; CHECK-NEXT: ldrb.w lr, [r0, #3] +; CHECK-NEXT: vmov.16 q0[1], r3 +; CHECK-NEXT: ldrb r4, [r0, #5] ; CHECK-NEXT: ldrb r0, [r0, #4] -; CHECK-NEXT: vmov.16 q0[1], r12 ; CHECK-NEXT: vmov.16 q0[2], r0 ; CHECK-NEXT: add r0, sp, #8 -; CHECK-NEXT: vmov.16 q0[3], r3 +; CHECK-NEXT: vmov.16 q0[3], r12 ; CHECK-NEXT: vmov.16 q0[4], lr -; CHECK-NEXT: vmov.16 q0[5], r5 -; CHECK-NEXT: vstrb.16 q0, [r4] +; CHECK-NEXT: vmov.16 q0[5], r4 +; CHECK-NEXT: vstrb.16 q0, [r5] ; CHECK-NEXT: vstrb.16 q0, [r0] ; CHECK-NEXT: vldrh.u32 q0, [r0] ; CHECK-NEXT: ldr r2, [sp] diff --git a/llvm/test/CodeGen/Thumb2/mve-vst4.ll b/llvm/test/CodeGen/Thumb2/mve-vst4.ll index f3a65c40031af..b36904495e878 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vst4.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vst4.ll @@ -238,27 +238,23 @@ define void @vst4_v2i16(ptr %src, ptr %dst) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: ldrh r3, [r0, #2] -; CHECK-NEXT: ldrh r2, [r0] -; CHECK-NEXT: ldrh.w r12, [r0, #10] -; CHECK-NEXT: ldrh.w lr, [r0, #4] -; CHECK-NEXT: vmov q1[2], q1[0], r2, r3 -; CHECK-NEXT: ldrh r4, [r0, #12] -; CHECK-NEXT: ldrh r5, [r0, #6] +; CHECK-NEXT: ldrh r2, [r0, #4] +; CHECK-NEXT: ldrh r3, [r0, #8] +; CHECK-NEXT: ldrh.w r12, [r0, #12] +; CHECK-NEXT: ldrh.w lr, [r0, #2] +; CHECK-NEXT: ldrh r4, [r0, #6] +; CHECK-NEXT: ldrh r5, [r0, #10] ; CHECK-NEXT: ldrh r6, [r0, #14] -; CHECK-NEXT: ldrh r0, [r0, #8] -; CHECK-NEXT: vmov q0[2], q0[0], r0, r12 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmov.16 q1[0], r0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q1[1], lr -; CHECK-NEXT: vmov.16 q1[2], r0 -; CHECK-NEXT: vmov.16 q1[3], r4 -; CHECK-NEXT: vmov.16 q1[4], r3 -; CHECK-NEXT: vmov.16 q1[5], r5 -; CHECK-NEXT: vmov.16 q1[6], r12 -; CHECK-NEXT: vmov.16 q1[7], r6 -; CHECK-NEXT: vstrh.16 q1, [r1] +; CHECK-NEXT: ldrh r0, [r0] +; CHECK-NEXT: vmov.16 q0[0], r0 +; CHECK-NEXT: vmov.16 q0[1], r2 +; CHECK-NEXT: vmov.16 q0[2], r3 +; CHECK-NEXT: vmov.16 q0[3], r12 +; CHECK-NEXT: vmov.16 q0[4], lr +; CHECK-NEXT: vmov.16 q0[5], r4 +; CHECK-NEXT: vmov.16 q0[6], r5 +; CHECK-NEXT: vmov.16 q0[7], r6 +; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: pop {r4, r5, r6, pc} entry: %l1 = load <2 x i16>, ptr %src, align 4 @@ -475,26 +471,22 @@ define void @vst4_v2i8(ptr %src, ptr %dst) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: ldrb r4, [r0, #5] -; CHECK-NEXT: ldrb r5, [r0, #4] -; CHECK-NEXT: ldrb r2, [r0] -; CHECK-NEXT: ldrb r3, [r0, #1] -; CHECK-NEXT: vmov q0[2], q0[0], r5, r4 -; CHECK-NEXT: vmov r5, s0 -; CHECK-NEXT: ldrb.w r12, [r0, #2] -; CHECK-NEXT: vmov q0[2], q0[0], r2, r3 -; CHECK-NEXT: ldrb.w lr, [r0, #3] -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: ldrb r6, [r0, #7] -; CHECK-NEXT: vmov.16 q0[0], r2 -; CHECK-NEXT: ldrb r0, [r0, #6] -; CHECK-NEXT: vmov.16 q0[1], r12 -; CHECK-NEXT: vmov.16 q0[2], r5 -; CHECK-NEXT: vmov.16 q0[3], r0 -; CHECK-NEXT: vmov.16 q0[4], r3 -; CHECK-NEXT: vmov.16 q0[5], lr -; CHECK-NEXT: vmov.16 q0[6], r4 -; CHECK-NEXT: vmov.16 q0[7], r6 +; CHECK-NEXT: ldrb r4, [r0] +; CHECK-NEXT: ldrb r6, [r0, #2] +; CHECK-NEXT: vmov.16 q0[0], r4 +; CHECK-NEXT: ldrb r2, [r0, #4] +; CHECK-NEXT: vmov.16 q0[1], r6 +; CHECK-NEXT: ldrb r3, [r0, #6] +; CHECK-NEXT: vmov.16 q0[2], r2 +; CHECK-NEXT: ldrb r5, [r0, #1] +; CHECK-NEXT: vmov.16 q0[3], r3 +; CHECK-NEXT: ldrb.w r12, [r0, #5] +; CHECK-NEXT: ldrb.w lr, [r0, #7] +; CHECK-NEXT: vmov.16 q0[4], r5 +; CHECK-NEXT: ldrb r0, [r0, #3] +; CHECK-NEXT: vmov.16 q0[5], r0 +; CHECK-NEXT: vmov.16 q0[6], r12 +; CHECK-NEXT: vmov.16 q0[7], lr ; CHECK-NEXT: vstrb.16 q0, [r1] ; CHECK-NEXT: pop {r4, r5, r6, pc} entry: