diff --git a/results/MultiSource/Applications/oggenc/CMakeFiles/oggenc.dir/oggenc.s b/results/MultiSource/Applications/oggenc/CMakeFiles/oggenc.dir/oggenc.s index 296400e..6632202 100644 --- a/results/MultiSource/Applications/oggenc/CMakeFiles/oggenc.dir/oggenc.s +++ b/results/MultiSource/Applications/oggenc/CMakeFiles/oggenc.dir/oggenc.s @@ -7572,11 +7572,22 @@ vorbis_encode_setup_managed: # @vorbis_encode_setup_managed .dword 0xc0f869f000000000 # double -99999 .LCPI41_3: .dword 0x408f400000000000 # double 1000 -.LCPI41_4: +.LCPI41_5: .dword 0x3feccccccccccccd # double 0.90000000000000002 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 +.LCPI41_4: + .word 0 # 0x0 + .word 1 # 0x1 + .word 4 # 0x4 + .word 5 # 0x5 + .word 2 # 0x2 + .word 3 # 0x3 + .word 6 # 0x6 + .word 7 # 0x7 .section .rodata.cst16,"aM",@progbits,16 .p2align 4, 0x0 -.LCPI41_5: +.LCPI41_6: .dword 0xc12e847e00000000 # double -999999 .dword 0x412e847e00000000 # double 999999 .text @@ -7599,14 +7610,14 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init st.d $s8, $sp, 120 # 8-byte Folded Spill fst.d $fs0, $sp, 112 # 8-byte Folded Spill fst.d $fs1, $sp, 104 # 8-byte Folded Spill - move $s2, $a0 - ld.d $s8, $a0, 48 + move $s8, $a0 + ld.d $s7, $a0, 48 addi.w $a0, $zero, -131 - beqz $s8, .LBB41_190 + beqz $s7, .LBB41_190 # %bb.1: lu12i.w $fp, 1 ori $a2, $fp, 1552 - fldx.d $fa1, $s8, $a2 + fldx.d $fa1, $s7, $a2 pcalau12i $a1, %pc_hi20(.LCPI41_0) fld.d $fa0, $a1, %pc_lo12(.LCPI41_0) fcmp.clt.d $fcc0, $fa0, $fa1 @@ -7617,10 +7628,10 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fcmp.cule.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB41_4 .LBB41_3: # %.sink.split - fstx.d $fa0, $s8, $a2 + fstx.d $fa0, $s7, $a2 .LBB41_4: ori $s0, $fp, 1568 - fldx.d $fa1, $s8, $s0 + fldx.d $fa1, $s7, $s0 movgr2fr.d $fa0, $zero fcmp.clt.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB41_6 @@ -7630,60 +7641,60 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fcmp.cule.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB41_7 .LBB41_6: # %.sink.split317 - fstx.d $fa0, $s8, $s0 + fstx.d $fa0, $s7, $s0 .LBB41_7: - ldptr.d $s6, $s8, 5512 + ldptr.d $s6, $s7, 5512 beqz $s6, .LBB41_190 # %bb.8: st.d $a2, $sp, 72 # 8-byte Folded Spill ori $a0, $zero, 1 - stptr.w $a0, $s8, 5520 + stptr.w $a0, $s7, 5520 ori $a0, $fp, 1432 st.d $a0, $sp, 8 # 8-byte Folded Spill - fldx.d $fa0, $s8, $a0 + fldx.d $fa0, $s7, $a0 ld.d $a0, $s6, 48 ld.d $a1, $s6, 56 - ld.d $a2, $s2, 48 + ld.d $a2, $s8, 48 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a3, $fa0 slli.d $a3, $a3, 2 ldx.w $a0, $a0, $a3 ldx.w $a1, $a1, $a3 - ldptr.w $a3, $s8, 5624 + ldptr.w $a3, $s7, 5624 st.d $a3, $sp, 80 # 8-byte Folded Spill st.d $a0, $a2, 0 st.d $a1, $a2, 8 - ld.d $s1, $s8, 0 - ld.d $s3, $s8, 8 + ld.d $s1, $s7, 0 + ld.d $s2, $s7, 8 ori $a0, $fp, 1448 st.d $a0, $sp, 56 # 8-byte Folded Spill - fldx.d $fa0, $s8, $a0 + fldx.d $fa0, $s7, $a0 ld.d $a2, $s6, 272 ld.d $a3, $s6, 280 ld.d $a4, $s6, 288 - ld.d $a0, $s2, 48 + ld.d $a0, $s8, 48 move $a1, $zero pcaddu18i $ra, %call36(vorbis_encode_floor_setup) jirl $ra, $ra, 0 st.d $s1, $sp, 96 # 8-byte Folded Spill - st.d $s3, $sp, 64 # 8-byte Folded Spill - beq $s1, $s3, .LBB41_10 + st.d $s2, $sp, 64 # 8-byte Folded Spill + beq $s1, $s2, .LBB41_10 # %bb.9: ori $a0, $fp, 1440 - fldx.d $fa0, $s8, $a0 + fldx.d $fa0, $s7, $a0 ld.d $a2, $s6, 272 ld.d $a3, $s6, 280 ld.d $a4, $s6, 296 - ld.d $a0, $s2, 48 + ld.d $a0, $s8, 48 ori $a1, $zero, 1 pcaddu18i $ra, %call36(vorbis_encode_floor_setup) jirl $ra, $ra, 0 .LBB41_10: ori $a0, $fp, 1576 - fldx.d $fa0, $s8, $a0 + fldx.d $fa0, $s7, $a0 ld.d $fp, $s6, 248 ld.d $s1, $s6, 256 - ld.d $s3, $s2, 48 + ld.d $s3, $s8, 48 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a1, $fa1 movgr2fr.w $fa1, $a1 @@ -7697,8 +7708,8 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init alsl.d $s5, $a1, $s1, 3 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 - ori $s7, $zero, 492 - mul.d $a1, $a1, $s7 + ori $s2, $zero, 492 + mul.d $a1, $a1, $s2 add.d $a1, $fp, $a1 ori $a2, $zero, 492 pcaddu18i $ra, %call36(memcpy) @@ -7722,7 +7733,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init and $a1, $a1, $a2 sub.w $a0, $a0, $a1 movgr2cf $fcc0, $a1 - mul.d $a0, $a0, $s7 + mul.d $a0, $a0, $s2 add.d $a0, $fp, $a0 lu12i.w $a4, 1 fld.s $fa3, $a0, 4 @@ -7812,13 +7823,14 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init ori $a0, $a4, 916 fstx.s $fa0, $s3, $a0 ld.d $a1, $s6, 264 - ld.d $a0, $s2, 48 + ld.d $a0, $s8, 48 pcalau12i $a2, %pc_hi20(.LCPI41_3) st.d $a2, $sp, 88 # 8-byte Folded Spill + move $a7, $s8 beqz $a1, .LBB41_14 # %bb.11: ori $a2, $a4, 1536 - fldx.d $fa0, $s8, $a2 + fldx.d $fa0, $s7, $a2 fcvt.s.d $fa0, $fa0 ftintrz.w.s $fa1, $fa0 movfr2gr.s $a2, $fa1 @@ -7845,12 +7857,12 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init stptr.w $a3, $a0, 5312 ld.d $a3, $a1, 60 stptr.d $a3, $a0, 5256 - ldptr.w $a3, $s8, 5560 + ldptr.w $a3, $s7, 5560 movgr2fr.w $fa1, $a2 ffint.s.w $fa1, $fa1 fsub.s $fa0, $fa0, $fa1 fcvt.d.s $fa0, $fa0 - ld.d $s7, $sp, 96 # 8-byte Folded Reload + ld.d $s8, $sp, 96 # 8-byte Folded Reload ld.d $s1, $sp, 56 # 8-byte Folded Reload beqz $a3, .LBB41_15 # %bb.12: # %.preheader104.i @@ -7879,7 +7891,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fcvt.d.s $fa6, $fa6 fmul.d $fa6, $fa0, $fa6 fadd.d $fa5, $fa5, $fa6 - ld.d $a5, $s2, 8 + ld.d $a5, $a7, 8 fcvt.s.d $fa5, $fa5 fcvt.d.s $fa6, $fa5 fmul.d $fa6, $fa6, $fa3 @@ -7891,7 +7903,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init movfr2gr.s $a5, $fa7 add.d $a6, $a0, $a2 st.w $a5, $a6, 60 - ld.d $a5, $s2, 8 + ld.d $a5, $a7, 8 movgr2fr.d $fa7, $a5 ffint.d.l $fa7, $fa7 fdiv.d $fa6, $fa6, $fa7 @@ -7909,7 +7921,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fcvt.d.s $fa6, $fa6 fmul.d $fa6, $fa0, $fa6 fadd.d $fa5, $fa5, $fa6 - ld.d $a4, $s2, 8 + ld.d $a4, $a7, 8 fcvt.s.d $fa5, $fa5 fcvt.d.s $fa5, $fa5 fmul.d $fa5, $fa5, $fa3 @@ -7920,7 +7932,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init ftintrz.w.d $fa6, $fa6 movfr2gr.s $a4, $fa6 st.w $a4, $a6, 300 - ld.d $a4, $s2, 8 + ld.d $a4, $a7, 8 movgr2fr.d $fa6, $a4 ffint.d.l $fa6, $fa6 fdiv.d $fa5, $fa5, $fa6 @@ -7964,7 +7976,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init stptr.w $a2, $a0, 5428 stptr.w $a1, $a0, 5372 stptr.w $a2, $a0, 5432 - ld.d $s7, $sp, 96 # 8-byte Folded Reload + ld.d $s8, $sp, 96 # 8-byte Folded Reload ld.d $s1, $sp, 56 # 8-byte Folded Reload b .LBB41_19 .LBB41_15: @@ -7993,7 +8005,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init addi.w $a3, $zero, -60 .p2align 4, , 16 .LBB41_16: # =>This Inner Loop Header: Depth=1 - ld.d $a4, $s2, 8 + ld.d $a4, $a7, 8 movgr2fr.d $fa6, $a4 ffint.d.l $fa6, $fa6 fdiv.d $fa6, $fa5, $fa6 @@ -8002,7 +8014,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init movfr2gr.s $a4, $fa6 add.d $a5, $a0, $a3 stptr.w $a4, $a5, 5136 - ld.d $a4, $s2, 8 + ld.d $a4, $a7, 8 movgr2fr.d $fa6, $a4 ffint.d.l $fa6, $fa6 fdiv.d $fa6, $fa5, $fa6 @@ -8027,7 +8039,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init addi.w $a1, $zero, -60 .p2align 4, , 16 .LBB41_18: # =>This Inner Loop Header: Depth=1 - ld.d $a2, $s2, 8 + ld.d $a2, $a7, 8 movgr2fr.d $fa3, $a2 ffint.d.l $fa3, $fa3 fdiv.d $fa3, $fa0, $fa3 @@ -8036,7 +8048,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init movfr2gr.s $a2, $fa3 add.d $a3, $a0, $a1 stptr.w $a2, $a3, 5376 - ld.d $a2, $s2, 8 + ld.d $a2, $a7, 8 movgr2fr.d $fa3, $a2 ffint.d.l $fa3, $fa3 fdiv.d $fa3, $fa0, $fa3 @@ -8047,9 +8059,9 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init stptr.w $a2, $a3, 5436 bnez $a1, .LBB41_18 .LBB41_19: # %vorbis_encode_global_stereo.exit - st.d $s8, $sp, 16 # 8-byte Folded Spill - fldx.d $fs1, $s8, $s1 - ld.d $s4, $s2, 48 + st.d $s7, $sp, 16 # 8-byte Folded Spill + fldx.d $fs1, $s7, $s1 + ld.d $s4, $a7, 48 ld.d $fp, $s6, 184 ld.d $s5, $s6, 200 ld.d $s3, $s6, 216 @@ -8057,8 +8069,8 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init ldptr.d $s0, $s4, 4912 blez $a0, .LBB41_62 # %bb.20: - st.d $s2, $sp, 96 # 8-byte Folded Spill - ld.d $s8, $sp, 64 # 8-byte Folded Reload + st.d $a7, $sp, 96 # 8-byte Folded Spill + ld.d $s7, $sp, 64 # 8-byte Folded Reload beqz $s0, .LBB41_63 .LBB41_21: pcalau12i $a0, %pc_hi20(_psy_info_template) @@ -8136,7 +8148,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init ld.d $a0, $sp, 80 # 8-byte Folded Reload sltui $s2, $a0, 1 ld.d $s1, $sp, 96 # 8-byte Folded Reload - beq $s7, $s8, .LBB41_42 + beq $s8, $s7, .LBB41_42 # %bb.30: lu12i.w $a0, 1 ori $s3, $a0, 1440 @@ -8257,7 +8269,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init ori $a1, $zero, 1 pcaddu18i $ra, %call36(vorbis_encode_tonemask_setup) jirl $ra, $ra, 0 - beq $s7, $s8, .LBB41_44 + beq $s8, $s7, .LBB41_44 # %bb.43: ori $a0, $fp, 1648 fldx.d $fa0, $s5, $a0 @@ -8294,7 +8306,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fld.d $fa4, $a3, 8 fmul.d $fa1, $fa3, $fa1 ld.d $a0, $s6, 160 - ldptr.d $a3, $a1, 4912 + ldptr.d $a4, $a1, 4912 fmul.d $fa0, $fa0, $fa4 fadd.d $fa0, $fa1, $fa0 ftintrz.w.d $fa1, $fa0 @@ -8304,213 +8316,174 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fsub.d $fa0, $fa0, $fa1 fcmp.ceq.d $fcc0, $fa0, $fs0 slt $a2, $zero, $a1 - movcf2gr $a4, $fcc0 - and $a2, $a2, $a4 + movcf2gr $a3, $fcc0 + and $a2, $a2, $a3 sub.w $a1, $a1, $a2 movgr2cf $fcc0, $a2 fsel $fa0, $fa0, $fa2, $fcc0 - ori $a2, $zero, 160 - mul.d $a1, $a1, $a2 - add.d $a0, $a0, $a1 - addi.d $a1, $a3, 336 - addi.d $a4, $a0, 320 + ori $a3, $zero, 160 + mul.d $a1, $a1, $a3 + add.d $a1, $a0, $a1 fsub.d $fa1, $fa2, $fa0 - move $a6, $s1 - bgeu $a1, $a4, .LBB41_48 + addi.d $a2, $a4, 336 + addi.d $a5, $a1, 320 + pcalau12i $a0, %pc_hi20(.LCPI41_4) + move $a7, $s1 + bgeu $a2, $a5, .LBB41_48 # %bb.45: # %vector.memcheck - addi.d $a3, $a3, 496 - bgeu $a0, $a3, .LBB41_48 + addi.d $a4, $a4, 496 + bgeu $a1, $a4, .LBB41_48 # %bb.46: # %scalar.ph.preheader - move $a2, $zero - ori $a3, $zero, 160 + move $a3, $zero + ori $a4, $zero, 160 .p2align 4, , 16 .LBB41_47: # %scalar.ph # =>This Inner Loop Header: Depth=1 - ldx.w $a4, $a0, $a2 - add.d $a5, $a0, $a2 - ld.w $a5, $a5, 160 - movgr2fr.w $fa2, $a4 + ldx.w $a5, $a1, $a3 + add.d $a6, $a1, $a3 + ld.w $a6, $a6, 160 + movgr2fr.w $fa2, $a5 ffint.d.w $fa2, $fa2 fmul.d $fa2, $fa1, $fa2 - movgr2fr.w $fa3, $a5 + movgr2fr.w $fa3, $a6 ffint.d.w $fa3, $fa3 fmul.d $fa3, $fa0, $fa3 fadd.d $fa2, $fa2, $fa3 fcvt.s.d $fa2, $fa2 - fstx.s $fa2, $a1, $a2 - addi.d $a2, $a2, 4 - bne $a2, $a3, .LBB41_47 + fstx.s $fa2, $a2, $a3 + addi.d $a3, $a3, 4 + bne $a3, $a4, .LBB41_47 b .LBB41_50 .LBB41_48: # %vector.ph - move $a3, $zero + xvld $xr2, $a0, %pc_lo12(.LCPI41_4) + move $a4, $zero xvreplve0.d $xr0, $xr0 xvreplve0.d $xr1, $xr1 .p2align 4, , 16 .LBB41_49: # %vector.body # =>This Inner Loop Header: Depth=1 - xvldx $xr2, $a0, $a3 - add.d $a4, $a0, $a3 - xvpermi.q $xr3, $xr2, 1 + xvldx $xr3, $a1, $a4 + add.d $a5, $a1, $a4 + vext2xv.d.w $xr4, $xr3 + xvffint.d.l $xr4, $xr4 + xvpermi.q $xr3, $xr3, 1 vext2xv.d.w $xr3, $xr3 + xvld $xr5, $a5, 160 xvffint.d.l $xr3, $xr3 - vext2xv.d.w $xr2, $xr2 - xvld $xr4, $a4, 160 - xvffint.d.l $xr2, $xr2 - xvfmul.d $xr2, $xr1, $xr2 xvfmul.d $xr3, $xr1, $xr3 - xvpermi.q $xr5, $xr4, 1 + xvfmul.d $xr4, $xr1, $xr4 + vext2xv.d.w $xr6, $xr5 + xvffint.d.l $xr6, $xr6 + xvpermi.q $xr5, $xr5, 1 vext2xv.d.w $xr5, $xr5 xvffint.d.l $xr5, $xr5 - vext2xv.d.w $xr4, $xr4 - xvffint.d.l $xr4, $xr4 - xvfmul.d $xr4, $xr0, $xr4 xvfmul.d $xr5, $xr0, $xr5 + xvfmul.d $xr6, $xr0, $xr6 + xvfadd.d $xr4, $xr4, $xr6 xvfadd.d $xr3, $xr3, $xr5 - xvfadd.d $xr2, $xr2, $xr4 - xvpickve.d $xr4, $xr2, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr2, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr2, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr2, $xr2, 3 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr5, $vr2, 48 - xvpickve.d $xr2, $xr3, 1 - fcvt.s.d $fa2, $fa2 - xvpickve.d $xr4, $xr3, 0 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr4, $vr2, 16 - xvpickve.d $xr2, $xr3, 2 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr4, $vr2, 32 - xvpickve.d $xr2, $xr3, 3 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr4, $vr2, 48 - xvpermi.q $xr5, $xr4, 2 - xvstx $xr5, $a1, $a3 - addi.d $a3, $a3, 32 - bne $a3, $a2, .LBB41_49 + xvfcvt.s.d $xr3, $xr3, $xr4 + xvperm.w $xr3, $xr3, $xr2 + xvstx $xr3, $a2, $a4 + addi.d $a4, $a4, 32 + bne $a4, $a3, .LBB41_49 .LBB41_50: # %vorbis_encode_compand_setup.exit - ori $a0, $fp, 1640 - fldx.d $fa0, $s5, $a0 - ld.d $a0, $s6, 168 - ld.d $a1, $a6, 48 + ori $a1, $fp, 1640 + fldx.d $fa0, $s5, $a1 + ld.d $a1, $s6, 168 + ld.d $a2, $a7, 48 ftintrz.w.d $fa1, $fa0 - movfr2gr.s $a2, $fa1 - movgr2fr.w $fa1, $a2 + movfr2gr.s $a3, $fa1 + movgr2fr.w $fa1, $a3 ffint.d.w $fa1, $fa1 fsub.d $fa0, $fa0, $fa1 - alsl.d $a3, $a2, $a0, 3 - slli.d $a2, $a2, 3 - fldx.d $fa1, $a0, $a2 + alsl.d $a4, $a3, $a1, 3 + slli.d $a3, $a3, 3 + fldx.d $fa1, $a1, $a3 vldi $vr2, -912 fsub.d $fa3, $fa2, $fa0 - fld.d $fa4, $a3, 8 + fld.d $fa4, $a4, 8 fmul.d $fa1, $fa3, $fa1 - ld.d $a0, $s6, 160 - ldptr.d $a3, $a1, 4920 + ld.d $a1, $s6, 160 + ldptr.d $a4, $a2, 4920 fmul.d $fa0, $fa0, $fa4 fadd.d $fa0, $fa1, $fa0 ftintrz.w.d $fa1, $fa0 - movfr2gr.s $a1, $fa1 - movgr2fr.w $fa1, $a1 + movfr2gr.s $a2, $fa1 + movgr2fr.w $fa1, $a2 ffint.d.w $fa1, $fa1 fsub.d $fa0, $fa0, $fa1 fcmp.ceq.d $fcc0, $fa0, $fs0 - slt $a2, $zero, $a1 - movcf2gr $a4, $fcc0 - and $a2, $a2, $a4 - sub.w $a1, $a1, $a2 - movgr2cf $fcc0, $a2 + slt $a3, $zero, $a2 + movcf2gr $a5, $fcc0 + and $a3, $a3, $a5 + sub.w $a2, $a2, $a3 + movgr2cf $fcc0, $a3 fsel $fa0, $fa0, $fa2, $fcc0 - ori $a2, $zero, 160 - mul.d $a1, $a1, $a2 - add.d $a0, $a0, $a1 - addi.d $a1, $a3, 336 - addi.d $a4, $a0, 320 + ori $a3, $zero, 160 + mul.d $a2, $a2, $a3 + add.d $a1, $a1, $a2 + addi.d $a2, $a4, 336 + addi.d $a5, $a1, 320 fsub.d $fa1, $fa2, $fa0 - bgeu $a1, $a4, .LBB41_54 + bgeu $a2, $a5, .LBB41_54 # %bb.51: # %vorbis_encode_compand_setup.exit - addi.d $a3, $a3, 496 - bgeu $a0, $a3, .LBB41_54 + addi.d $a4, $a4, 496 + bgeu $a1, $a4, .LBB41_54 # %bb.52: # %scalar.ph392.preheader - move $a2, $zero - ori $a3, $zero, 160 + move $a3, $zero + ori $a4, $zero, 160 .p2align 4, , 16 .LBB41_53: # %scalar.ph392 # =>This Inner Loop Header: Depth=1 - ldx.w $a4, $a0, $a2 - add.d $a5, $a0, $a2 - ld.w $a5, $a5, 160 - movgr2fr.w $fa2, $a4 + ldx.w $a5, $a1, $a3 + add.d $a6, $a1, $a3 + ld.w $a6, $a6, 160 + movgr2fr.w $fa2, $a5 ffint.d.w $fa2, $fa2 fmul.d $fa2, $fa1, $fa2 - movgr2fr.w $fa3, $a5 + movgr2fr.w $fa3, $a6 ffint.d.w $fa3, $fa3 fmul.d $fa3, $fa0, $fa3 fadd.d $fa2, $fa2, $fa3 fcvt.s.d $fa2, $fa2 - fstx.s $fa2, $a1, $a2 - addi.d $a2, $a2, 4 - bne $a2, $a3, .LBB41_53 + fstx.s $fa2, $a2, $a3 + addi.d $a3, $a3, 4 + bne $a3, $a4, .LBB41_53 b .LBB41_56 .LBB41_54: # %vector.ph393 - move $a3, $zero + xvld $xr2, $a0, %pc_lo12(.LCPI41_4) + move $a4, $zero xvreplve0.d $xr0, $xr0 xvreplve0.d $xr1, $xr1 .p2align 4, , 16 .LBB41_55: # %vector.body398 # =>This Inner Loop Header: Depth=1 - xvldx $xr2, $a0, $a3 - add.d $a4, $a0, $a3 - xvpermi.q $xr3, $xr2, 1 + xvldx $xr3, $a1, $a4 + add.d $a5, $a1, $a4 + vext2xv.d.w $xr4, $xr3 + xvffint.d.l $xr4, $xr4 + xvpermi.q $xr3, $xr3, 1 vext2xv.d.w $xr3, $xr3 + xvld $xr5, $a5, 160 xvffint.d.l $xr3, $xr3 - vext2xv.d.w $xr2, $xr2 - xvld $xr4, $a4, 160 - xvffint.d.l $xr2, $xr2 - xvfmul.d $xr2, $xr1, $xr2 xvfmul.d $xr3, $xr1, $xr3 - xvpermi.q $xr5, $xr4, 1 + xvfmul.d $xr4, $xr1, $xr4 + vext2xv.d.w $xr6, $xr5 + xvffint.d.l $xr6, $xr6 + xvpermi.q $xr5, $xr5, 1 vext2xv.d.w $xr5, $xr5 xvffint.d.l $xr5, $xr5 - vext2xv.d.w $xr4, $xr4 - xvffint.d.l $xr4, $xr4 - xvfmul.d $xr4, $xr0, $xr4 xvfmul.d $xr5, $xr0, $xr5 + xvfmul.d $xr6, $xr0, $xr6 + xvfadd.d $xr4, $xr4, $xr6 xvfadd.d $xr3, $xr3, $xr5 - xvfadd.d $xr2, $xr2, $xr4 - xvpickve.d $xr4, $xr2, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr2, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr2, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr2, $xr2, 3 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr5, $vr2, 48 - xvpickve.d $xr2, $xr3, 1 - fcvt.s.d $fa2, $fa2 - xvpickve.d $xr4, $xr3, 0 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr4, $vr2, 16 - xvpickve.d $xr2, $xr3, 2 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr4, $vr2, 32 - xvpickve.d $xr2, $xr3, 3 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr4, $vr2, 48 - xvpermi.q $xr5, $xr4, 2 - xvstx $xr5, $a1, $a3 - addi.d $a3, $a3, 32 - bne $a3, $a2, .LBB41_55 + xvfcvt.s.d $xr3, $xr3, $xr4 + xvperm.w $xr3, $xr3, $xr2 + xvstx $xr3, $a2, $a4 + addi.d $a4, $a4, 32 + bne $a4, $a3, .LBB41_55 .LBB41_56: # %vorbis_encode_compand_setup.exit237 - bne $s7, $s8, .LBB41_58 + bne $s8, $s7, .LBB41_58 # %bb.57: # %.critedge addi.d $a3, $s0, 8 ori $a0, $fp, 824 @@ -8518,74 +8491,74 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init ori $a2, $fp, 816 b .LBB41_73 .LBB41_58: # %vector.memcheck404 - ori $a0, $fp, 1672 - fldx.d $fa0, $s5, $a0 - ld.d $a0, $s6, 176 - ld.d $a1, $a6, 48 + ori $a1, $fp, 1672 + fldx.d $fa0, $s5, $a1 + ld.d $a1, $s6, 176 + ld.d $a2, $a7, 48 ftintrz.w.d $fa1, $fa0 - movfr2gr.s $a2, $fa1 - movgr2fr.w $fa1, $a2 + movfr2gr.s $a3, $fa1 + movgr2fr.w $fa1, $a3 ffint.d.w $fa1, $fa1 fsub.d $fa0, $fa0, $fa1 - alsl.d $a3, $a2, $a0, 3 - slli.d $a2, $a2, 3 - fldx.d $fa1, $a0, $a2 + alsl.d $a4, $a3, $a1, 3 + slli.d $a3, $a3, 3 + fldx.d $fa1, $a1, $a3 vldi $vr2, -912 fsub.d $fa3, $fa2, $fa0 - fld.d $fa4, $a3, 8 + fld.d $fa4, $a4, 8 fmul.d $fa1, $fa3, $fa1 - ld.d $a0, $s6, 160 - ldptr.d $a3, $a1, 4928 + ld.d $a1, $s6, 160 + ldptr.d $a4, $a2, 4928 fmul.d $fa0, $fa0, $fa4 fadd.d $fa0, $fa1, $fa0 ftintrz.w.d $fa1, $fa0 - movfr2gr.s $a1, $fa1 - movgr2fr.w $fa1, $a1 + movfr2gr.s $a2, $fa1 + movgr2fr.w $fa1, $a2 ffint.d.w $fa1, $fa1 fsub.d $fa0, $fa0, $fa1 fcmp.ceq.d $fcc0, $fa0, $fs0 - slt $a2, $zero, $a1 - movcf2gr $a4, $fcc0 - and $a2, $a2, $a4 - sub.w $a1, $a1, $a2 - movgr2cf $fcc0, $a2 + slt $a3, $zero, $a2 + movcf2gr $a5, $fcc0 + and $a3, $a3, $a5 + sub.w $a2, $a2, $a3 + movgr2cf $fcc0, $a3 fsel $fa0, $fa0, $fa2, $fcc0 - ori $a2, $zero, 160 - mul.d $a1, $a1, $a2 - add.d $a0, $a0, $a1 - addi.d $a1, $a3, 336 - addi.d $a4, $a0, 320 + ori $a3, $zero, 160 + mul.d $a2, $a2, $a3 + add.d $a1, $a1, $a2 + addi.d $a2, $a4, 336 + addi.d $a5, $a1, 320 fsub.d $fa1, $fa2, $fa0 - bgeu $a1, $a4, .LBB41_64 + bgeu $a2, $a5, .LBB41_64 # %bb.59: # %vector.memcheck404 - addi.d $a3, $a3, 496 - bgeu $a0, $a3, .LBB41_64 + addi.d $a4, $a4, 496 + bgeu $a1, $a4, .LBB41_64 # %bb.60: # %scalar.ph410.preheader - move $a2, $zero - ori $a3, $zero, 160 + move $a3, $zero + ori $a4, $zero, 160 .p2align 4, , 16 .LBB41_61: # %scalar.ph410 # =>This Inner Loop Header: Depth=1 - ldx.w $a4, $a0, $a2 - add.d $a5, $a0, $a2 - ld.w $a5, $a5, 160 - movgr2fr.w $fa2, $a4 + ldx.w $a5, $a1, $a3 + add.d $a6, $a1, $a3 + ld.w $a6, $a6, 160 + movgr2fr.w $fa2, $a5 ffint.d.w $fa2, $fa2 fmul.d $fa2, $fa1, $fa2 - movgr2fr.w $fa3, $a5 + movgr2fr.w $fa3, $a6 ffint.d.w $fa3, $fa3 fmul.d $fa3, $fa0, $fa3 fadd.d $fa2, $fa2, $fa3 fcvt.s.d $fa2, $fa2 - fstx.s $fa2, $a1, $a2 - addi.d $a2, $a2, 4 - bne $a2, $a3, .LBB41_61 + fstx.s $fa2, $a2, $a3 + addi.d $a3, $a3, 4 + bne $a3, $a4, .LBB41_61 b .LBB41_66 .LBB41_62: ori $a0, $zero, 1 st.w $a0, $s4, 36 - st.d $s2, $sp, 96 # 8-byte Folded Spill - ld.d $s8, $sp, 64 # 8-byte Folded Reload + st.d $a7, $sp, 96 # 8-byte Folded Spill + ld.d $s7, $sp, 64 # 8-byte Folded Reload bnez $s0, .LBB41_21 .LBB41_63: ori $a0, $zero, 1 @@ -8596,108 +8569,88 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init stptr.d $a0, $s4, 4912 b .LBB41_21 .LBB41_64: # %vector.ph411 - move $a3, $zero + xvld $xr2, $a0, %pc_lo12(.LCPI41_4) + move $a4, $zero xvreplve0.d $xr0, $xr0 xvreplve0.d $xr1, $xr1 .p2align 4, , 16 .LBB41_65: # %vector.body416 # =>This Inner Loop Header: Depth=1 - xvldx $xr2, $a0, $a3 - add.d $a4, $a0, $a3 - xvpermi.q $xr3, $xr2, 1 + xvldx $xr3, $a1, $a4 + add.d $a5, $a1, $a4 + vext2xv.d.w $xr4, $xr3 + xvffint.d.l $xr4, $xr4 + xvpermi.q $xr3, $xr3, 1 vext2xv.d.w $xr3, $xr3 + xvld $xr5, $a5, 160 xvffint.d.l $xr3, $xr3 - vext2xv.d.w $xr2, $xr2 - xvld $xr4, $a4, 160 - xvffint.d.l $xr2, $xr2 - xvfmul.d $xr2, $xr1, $xr2 xvfmul.d $xr3, $xr1, $xr3 - xvpermi.q $xr5, $xr4, 1 + xvfmul.d $xr4, $xr1, $xr4 + vext2xv.d.w $xr6, $xr5 + xvffint.d.l $xr6, $xr6 + xvpermi.q $xr5, $xr5, 1 vext2xv.d.w $xr5, $xr5 xvffint.d.l $xr5, $xr5 - vext2xv.d.w $xr4, $xr4 - xvffint.d.l $xr4, $xr4 - xvfmul.d $xr4, $xr0, $xr4 xvfmul.d $xr5, $xr0, $xr5 + xvfmul.d $xr6, $xr0, $xr6 + xvfadd.d $xr4, $xr4, $xr6 xvfadd.d $xr3, $xr3, $xr5 - xvfadd.d $xr2, $xr2, $xr4 - xvpickve.d $xr4, $xr2, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr2, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr2, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr2, $xr2, 3 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr5, $vr2, 48 - xvpickve.d $xr2, $xr3, 1 - fcvt.s.d $fa2, $fa2 - xvpickve.d $xr4, $xr3, 0 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr4, $vr2, 16 - xvpickve.d $xr2, $xr3, 2 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr4, $vr2, 32 - xvpickve.d $xr2, $xr3, 3 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr4, $vr2, 48 - xvpermi.q $xr5, $xr4, 2 - xvstx $xr5, $a1, $a3 - addi.d $a3, $a3, 32 - bne $a3, $a2, .LBB41_65 + xvfcvt.s.d $xr3, $xr3, $xr4 + xvperm.w $xr3, $xr3, $xr2 + xvstx $xr3, $a2, $a4 + addi.d $a4, $a4, 32 + bne $a4, $a3, .LBB41_65 .LBB41_66: # %vorbis_encode_compand_setup.exit244 - ori $a0, $fp, 1704 - fldx.d $fa0, $s5, $a0 - ld.d $a0, $s6, 176 - ld.d $a1, $a6, 48 + ori $a1, $fp, 1704 + fldx.d $fa0, $s5, $a1 + ld.d $a1, $s6, 176 + ld.d $a2, $a7, 48 ftintrz.w.d $fa1, $fa0 - movfr2gr.s $a2, $fa1 - movgr2fr.w $fa1, $a2 + movfr2gr.s $a3, $fa1 + movgr2fr.w $fa1, $a3 ffint.d.w $fa1, $fa1 fsub.d $fa0, $fa0, $fa1 - alsl.d $a3, $a2, $a0, 3 - slli.d $a2, $a2, 3 - fldx.d $fa1, $a0, $a2 + alsl.d $a4, $a3, $a1, 3 + slli.d $a3, $a3, 3 + fldx.d $fa1, $a1, $a3 vldi $vr2, -912 fsub.d $fa3, $fa2, $fa0 - fld.d $fa4, $a3, 8 + fld.d $fa4, $a4, 8 fmul.d $fa1, $fa3, $fa1 - ld.d $a0, $s6, 160 - ldptr.d $a3, $a1, 4936 + ld.d $a1, $s6, 160 + ldptr.d $a4, $a2, 4936 fmul.d $fa0, $fa0, $fa4 fadd.d $fa0, $fa1, $fa0 ftintrz.w.d $fa1, $fa0 - movfr2gr.s $a1, $fa1 - movgr2fr.w $fa1, $a1 + movfr2gr.s $a2, $fa1 + movgr2fr.w $fa1, $a2 ffint.d.w $fa1, $fa1 fsub.d $fa0, $fa0, $fa1 fcmp.ceq.d $fcc0, $fa0, $fs0 - slt $a2, $zero, $a1 - movcf2gr $a4, $fcc0 - and $a2, $a2, $a4 - sub.w $a1, $a1, $a2 - movgr2cf $fcc0, $a2 + slt $a3, $zero, $a2 + movcf2gr $a5, $fcc0 + and $a3, $a3, $a5 + sub.w $a2, $a2, $a3 + movgr2cf $fcc0, $a3 fsel $fa0, $fa0, $fa2, $fcc0 - ori $a2, $zero, 160 - mul.d $a1, $a1, $a2 - add.d $a0, $a0, $a1 - addi.d $a1, $a3, 336 - addi.d $a4, $a0, 320 + ori $a3, $zero, 160 + mul.d $a2, $a2, $a3 + add.d $a1, $a1, $a2 + addi.d $a2, $a4, 336 + addi.d $a5, $a1, 320 fsub.d $fa1, $fa2, $fa0 - bgeu $a1, $a4, .LBB41_70 + bgeu $a2, $a5, .LBB41_70 # %bb.67: # %vorbis_encode_compand_setup.exit244 - addi.d $a3, $a3, 496 - bgeu $a0, $a3, .LBB41_70 + addi.d $a4, $a4, 496 + bgeu $a1, $a4, .LBB41_70 # %bb.68: # %scalar.ph428.preheader - move $a2, $zero + move $a0, $zero ori $a3, $zero, 160 .p2align 4, , 16 .LBB41_69: # %scalar.ph428 # =>This Inner Loop Header: Depth=1 - ldx.w $a4, $a0, $a2 - add.d $a5, $a0, $a2 + ldx.w $a4, $a1, $a0 + add.d $a5, $a1, $a0 ld.w $a5, $a5, 160 movgr2fr.w $fa2, $a4 ffint.d.w $fa2, $fa2 @@ -8707,66 +8660,46 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fmul.d $fa3, $fa0, $fa3 fadd.d $fa2, $fa2, $fa3 fcvt.s.d $fa2, $fa2 - fstx.s $fa2, $a1, $a2 - addi.d $a2, $a2, 4 - bne $a2, $a3, .LBB41_69 + fstx.s $fa2, $a2, $a0 + addi.d $a0, $a0, 4 + bne $a0, $a3, .LBB41_69 b .LBB41_72 .LBB41_70: # %vector.ph429 - move $a3, $zero + xvld $xr2, $a0, %pc_lo12(.LCPI41_4) + move $a0, $zero xvreplve0.d $xr0, $xr0 xvreplve0.d $xr1, $xr1 .p2align 4, , 16 .LBB41_71: # %vector.body434 # =>This Inner Loop Header: Depth=1 - xvldx $xr2, $a0, $a3 - add.d $a4, $a0, $a3 - xvpermi.q $xr3, $xr2, 1 + xvldx $xr3, $a1, $a0 + add.d $a4, $a1, $a0 + vext2xv.d.w $xr4, $xr3 + xvffint.d.l $xr4, $xr4 + xvpermi.q $xr3, $xr3, 1 vext2xv.d.w $xr3, $xr3 + xvld $xr5, $a4, 160 xvffint.d.l $xr3, $xr3 - vext2xv.d.w $xr2, $xr2 - xvld $xr4, $a4, 160 - xvffint.d.l $xr2, $xr2 - xvfmul.d $xr2, $xr1, $xr2 xvfmul.d $xr3, $xr1, $xr3 - xvpermi.q $xr5, $xr4, 1 + xvfmul.d $xr4, $xr1, $xr4 + vext2xv.d.w $xr6, $xr5 + xvffint.d.l $xr6, $xr6 + xvpermi.q $xr5, $xr5, 1 vext2xv.d.w $xr5, $xr5 xvffint.d.l $xr5, $xr5 - vext2xv.d.w $xr4, $xr4 - xvffint.d.l $xr4, $xr4 - xvfmul.d $xr4, $xr0, $xr4 xvfmul.d $xr5, $xr0, $xr5 + xvfmul.d $xr6, $xr0, $xr6 + xvfadd.d $xr4, $xr4, $xr6 xvfadd.d $xr3, $xr3, $xr5 - xvfadd.d $xr2, $xr2, $xr4 - xvpickve.d $xr4, $xr2, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr2, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr2, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr2, $xr2, 3 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr5, $vr2, 48 - xvpickve.d $xr2, $xr3, 1 - fcvt.s.d $fa2, $fa2 - xvpickve.d $xr4, $xr3, 0 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr4, $vr2, 16 - xvpickve.d $xr2, $xr3, 2 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr4, $vr2, 32 - xvpickve.d $xr2, $xr3, 3 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr4, $vr2, 48 - xvpermi.q $xr5, $xr4, 2 - xvstx $xr5, $a1, $a3 - addi.d $a3, $a3, 32 - bne $a3, $a2, .LBB41_71 + xvfcvt.s.d $xr3, $xr3, $xr4 + xvperm.w $xr3, $xr3, $xr2 + xvstx $xr3, $a2, $a0 + addi.d $a0, $a0, 32 + bne $a0, $a3, .LBB41_71 .LBB41_72: # %vorbis_encode_compand_setup.exit251 fld.d $fa0, $s0, 8 ld.d $a0, $s6, 80 - ld.d $a1, $a6, 48 + ld.d $a1, $a7, 48 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a2, $fa1 movgr2fr.w $fa1, $a2 @@ -8791,7 +8724,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init ori $a0, $fp, 1624 fldx.d $fa0, $s5, $a0 ld.d $a0, $s6, 80 - ld.d $a1, $a6, 48 + ld.d $a1, $a7, 48 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a2, $fa1 movgr2fr.w $fa1, $a2 @@ -8820,7 +8753,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init .LBB41_73: fld.d $fa0, $a3, 0 ld.d $a3, $s6, 80 - ld.d $a4, $a6, 48 + ld.d $a4, $a7, 48 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a5, $fa1 movgr2fr.w $fa1, $a5 @@ -8844,7 +8777,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fst.s $fa0, $a2, 32 fldx.d $fa0, $s5, $a1 ld.d $a1, $s6, 80 - ld.d $a2, $a6, 48 + ld.d $a2, $a7, 48 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a3, $fa1 movgr2fr.w $fa1, $a3 @@ -8875,10 +8808,10 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init ori $a0, $fp, 1456 fldx.d $fs0, $s5, $a0 .LBB41_75: - ld.d $a0, $a6, 48 + ld.d $a0, $a7, 48 move $a1, $zero fmov.d $fa1, $fs0 - move $s0, $a6 + move $s0, $a7 pcaddu18i $ra, %call36(vorbis_encode_noisebias_setup) jirl $ra, $ra, 0 ori $a0, $fp, 1632 @@ -8892,7 +8825,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init ori $s1, $zero, 1 pcaddu18i $ra, %call36(vorbis_encode_noisebias_setup) jirl $ra, $ra, 0 - bne $s7, $s8, .LBB41_77 + bne $s8, $s7, .LBB41_77 # %bb.76: ori $a0, $fp, 824 ori $a1, $fp, 816 @@ -9015,14 +8948,14 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init ori $a1, $zero, 3208 pcaddu18i $ra, %call36(calloc) jirl $ra, $ra, 0 - slli.d $s3, $s6, 3 - stx.d $a0, $s0, $s3 + slli.d $s5, $s6, 3 + stx.d $a0, $s0, $s5 ori $a0, $zero, 1 ori $a1, $zero, 16 pcaddu18i $ra, %call36(calloc) jirl $ra, $ra, 0 ld.d $a1, $sp, 48 # 8-byte Folded Reload - stx.d $a0, $a1, $s3 + stx.d $a0, $a1, $s5 slli.d $a1, $s6, 4 ld.d $a2, $sp, 32 # 8-byte Folded Reload vldx $vr0, $a2, $a1 @@ -9035,10 +8968,10 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init .LBB41_82: # in Loop: Header=BB41_80 Depth=1 st.d $s1, $sp, 80 # 8-byte Folded Spill alsl.d $a0, $s6, $s0, 3 - slli.d $s5, $s6, 2 + slli.d $s3, $s6, 2 ld.d $a0, $a0, 0 ld.d $a1, $sp, 40 # 8-byte Folded Reload - stx.w $zero, $a1, $s5 + stx.w $zero, $a1, $s3 ori $a1, $zero, 3208 mul.d $a1, $s6, $a1 ld.d $a2, $sp, 64 # 8-byte Folded Reload @@ -9063,7 +8996,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init .p2align 4, , 16 .LBB41_86: # %vorbis_encode_residue_setup.exit.i # in Loop: Header=BB41_87 Depth=2 - ldx.w $a0, $a0, $s5 + ldx.w $a0, $a0, $s3 addi.d $a0, $a0, -2 sltui $a0, $a0, 1 fdiv.d $fa0, $fa3, $fa0 @@ -9072,8 +9005,8 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init movgr2cf $fcc0, $a0 ld.w $a0, $s1, 16 fsel $fa0, $fa0, $fa1, $fcc0 - pcalau12i $a1, %pc_hi20(.LCPI41_4) - fld.d $fa1, $a1, %pc_lo12(.LCPI41_4) + pcalau12i $a1, %pc_hi20(.LCPI41_5) + fld.d $fa1, $a1, %pc_lo12(.LCPI41_5) movgr2fr.w $fa2, $a0 ffint.d.w $fa2, $fa2 fdiv.d $fa0, $fa0, $fa2 @@ -9123,7 +9056,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init addi.d $a0, $fp, 1 st.w $a0, $s4, 28 .LBB41_89: # in Loop: Header=BB41_87 Depth=2 - ldx.d $a1, $s4, $s3 + ldx.d $a1, $s4, $s5 ori $a0, $zero, 16 ori $a2, $zero, 64 beq $a1, $a2, .LBB41_93 @@ -9722,8 +9655,8 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fstx.d $fa0, $a3, $a2 movgr2fr.d $fa0, $a1 ffint.d.l $fa0, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI41_5) - vld $vr1, $a1, %pc_lo12(.LCPI41_5) + pcalau12i $a1, %pc_hi20(.LCPI41_6) + vld $vr1, $a1, %pc_lo12(.LCPI41_6) ori $a1, $fp, 1392 fstx.d $fa0, $a3, $a1 ori $a1, $fp, 1400 @@ -33303,49 +33236,27 @@ vorbis_lpc_from_data: # @vorbis_lpc_from_data movgr2fr.w $fa0, $zero b .LBB176_28 .LBB176_20: # %.lr.ph110.preheader - ori $a0, $zero, 8 + ori $a0, $zero, 4 bgeu $a3, $a0, .LBB176_22 # %bb.21: move $a0, $zero b .LBB176_25 .LBB176_22: # %vector.ph - bstrpick.d $a0, $a3, 30, 3 - slli.d $a0, $a0, 3 - addi.d $a2, $a1, 16 - addi.d $a5, $a4, 32 + bstrpick.d $a0, $a3, 30, 2 + slli.d $a0, $a0, 2 + addi.d $a2, $a1, 8 + addi.d $a5, $a4, 16 move $a6, $a0 .p2align 4, , 16 .LBB176_23: # %vector.body # =>This Inner Loop Header: Depth=1 - xvld $xr1, $a5, -32 - xvld $xr2, $a5, 0 - xvpickve.d $xr3, $xr1, 1 - fcvt.s.d $fa3, $fa3 - xvpickve.d $xr4, $xr1, 0 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr4, $vr3, 16 - xvpickve.d $xr3, $xr1, 2 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr4, $vr3, 32 - xvpickve.d $xr1, $xr1, 3 - fcvt.s.d $fa1, $fa1 - vextrins.w $vr4, $vr1, 48 - xvpickve.d $xr1, $xr2, 1 - fcvt.s.d $fa1, $fa1 - xvpickve.d $xr3, $xr2, 0 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr3, $vr1, 16 - xvpickve.d $xr1, $xr2, 2 - fcvt.s.d $fa1, $fa1 - vextrins.w $vr3, $vr1, 32 - xvpickve.d $xr1, $xr2, 3 - fcvt.s.d $fa1, $fa1 - vextrins.w $vr3, $vr1, 48 - vst $vr4, $a2, -16 - vst $vr3, $a2, 0 - addi.d $a6, $a6, -8 - addi.d $a2, $a2, 32 - addi.d $a5, $a5, 64 + vld $vr1, $a5, -16 + vld $vr2, $a5, 0 + vfcvt.s.d $vr1, $vr2, $vr1 + vst $vr1, $a2, -8 + addi.d $a6, $a6, -4 + addi.d $a2, $a2, 16 + addi.d $a5, $a5, 32 bnez $a6, .LBB176_23 # %bb.24: # %middle.block beq $a0, $a3, .LBB176_27 @@ -34938,18 +34849,9 @@ _book_unquantize: # @_book_unquantize xvfmul.d $xr7, $xr2, $xr7 xvfadd.d $xr7, $xr7, $xr3 xvfadd.d $xr7, $xr7, $xr5 - xvpickve.d $xr8, $xr7, 1 - fcvt.s.d $ft0, $ft0 - xvpickve.d $xr9, $xr7, 0 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr9, $vr8, 16 - xvpickve.d $xr8, $xr7, 2 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr9, $vr8, 32 - xvpickve.d $xr7, $xr7, 3 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr9, $vr7, 48 - vst $vr9, $t3, 0 + xvpermi.q $xr8, $xr7, 1 + vfcvt.s.d $vr7, $vr8, $vr7 + vst $vr7, $t3, 0 addi.d $t6, $t6, -4 addi.d $t3, $t3, 16 addi.d $t5, $t5, 32 @@ -35011,18 +34913,9 @@ _book_unquantize: # @_book_unquantize xvfmul.d $xr7, $xr2, $xr7 xvfadd.d $xr7, $xr7, $xr3 xvfadd.d $xr7, $xr7, $xr5 - xvpickve.d $xr8, $xr7, 1 - fcvt.s.d $ft0, $ft0 - xvpickve.d $xr9, $xr7, 0 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr9, $vr8, 16 - xvpickve.d $xr8, $xr7, 2 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr9, $vr8, 32 - xvpickve.d $xr7, $xr7, 3 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr9, $vr7, 48 - vst $vr9, $t3, 0 + xvpermi.q $xr8, $xr7, 1 + vfcvt.s.d $vr7, $vr8, $vr7 + vst $vr7, $t3, 0 addi.d $t5, $t5, -4 addi.d $t3, $t3, 16 addi.d $t4, $t4, 32 @@ -63934,49 +63827,27 @@ Newton_Raphson: # @Newton_Raphson addi.w $a4, $a4, 1 bcnez $fcc0, .LBB250_9 # %bb.15: # %.lr.ph67.preheader - ori $a0, $zero, 8 + ori $a0, $zero, 4 bgeu $a1, $a0, .LBB250_17 # %bb.16: move $a0, $zero b .LBB250_20 .LBB250_17: # %vector.ph7 - bstrpick.d $a0, $a1, 30, 3 - slli.d $a0, $a0, 3 - addi.d $a4, $a2, 16 - addi.d $a5, $a3, 32 + bstrpick.d $a0, $a1, 30, 2 + slli.d $a0, $a0, 2 + addi.d $a4, $a2, 8 + addi.d $a5, $a3, 16 move $a6, $a0 .p2align 4, , 16 .LBB250_18: # %vector.body10 # =>This Inner Loop Header: Depth=1 - xvld $xr0, $a5, -32 - xvld $xr1, $a5, 0 - xvpickve.d $xr2, $xr0, 1 - fcvt.s.d $fa2, $fa2 - xvpickve.d $xr3, $xr0, 0 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr3, $vr2, 16 - xvpickve.d $xr2, $xr0, 2 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr3, $vr2, 32 - xvpickve.d $xr0, $xr0, 3 - fcvt.s.d $fa0, $fa0 - vextrins.w $vr3, $vr0, 48 - xvpickve.d $xr0, $xr1, 1 - fcvt.s.d $fa0, $fa0 - xvpickve.d $xr2, $xr1, 0 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr2, $vr0, 16 - xvpickve.d $xr0, $xr1, 2 - fcvt.s.d $fa0, $fa0 - vextrins.w $vr2, $vr0, 32 - xvpickve.d $xr0, $xr1, 3 - fcvt.s.d $fa0, $fa0 - vextrins.w $vr2, $vr0, 48 - vst $vr3, $a4, -16 - vst $vr2, $a4, 0 - addi.d $a6, $a6, -8 - addi.d $a4, $a4, 32 - addi.d $a5, $a5, 64 + vld $vr0, $a5, -16 + vld $vr1, $a5, 0 + vfcvt.s.d $vr0, $vr1, $vr0 + vst $vr0, $a4, -8 + addi.d $a6, $a6, -4 + addi.d $a4, $a4, 16 + addi.d $a5, $a5, 32 bnez $a6, .LBB250_18 # %bb.19: # %middle.block15 beq $a0, $a1, .LBB250_22 diff --git a/results/MultiSource/Applications/sgefa/CMakeFiles/sgefa.dir/driver.s b/results/MultiSource/Applications/sgefa/CMakeFiles/sgefa.dir/driver.s index 2e0a9f8..30c8bd8 100644 --- a/results/MultiSource/Applications/sgefa/CMakeFiles/sgefa.dir/driver.s +++ b/results/MultiSource/Applications/sgefa/CMakeFiles/sgefa.dir/driver.s @@ -249,9 +249,18 @@ main: # @main .word 5 # 0x5 .word 6 # 0x6 .word 7 # 0x7 +.LCPI1_1: + .word 0 # 0x0 + .word 1 # 0x1 + .word 4 # 0x4 + .word 5 # 0x5 + .word 2 # 0x2 + .word 3 # 0x3 + .word 6 # 0x6 + .word 7 # 0x7 .section .rodata.cst8,"aM",@progbits,8 .p2align 3, 0x0 -.LCPI1_1: +.LCPI1_2: .dword 0x47d2ced32a16a1b1 # double 9.9999999999999997E+37 .text .globl matgen @@ -1301,13 +1310,15 @@ matgen: # @matgen bstrpick.d $a4, $a2, 30, 3 slli.d $a3, $a4, 3 slli.d $a4, $a4, 5 - pcalau12i $a5, %pc_hi20(.LCPI1_1) - fld.d $fa1, $a5, %pc_lo12(.LCPI1_1) + pcalau12i $a5, %pc_hi20(.LCPI1_2) + fld.d $fa1, $a5, %pc_lo12(.LCPI1_2) pcalau12i $a5, %pc_hi20(.LCPI1_0) xvld $xr2, $a5, %pc_lo12(.LCPI1_0) - xvreplve0.d $xr3, $xr0 + pcalau12i $a5, %pc_hi20(.LCPI1_1) + xvld $xr3, $a5, %pc_lo12(.LCPI1_1) + xvreplve0.d $xr4, $xr0 ori $a5, $zero, 8 - xvreplgr2vr.d $xr4, $s5 + xvreplgr2vr.d $xr5, $s5 b .LBB1_110 .LBB1_109: # %._crit_edge469 # in Loop: Header=BB1_110 Depth=1 @@ -1323,14 +1334,14 @@ matgen: # @matgen srli.d $a7, $a6, 1 andi $t1, $a6, 1 or $a7, $t1, $a7 - movgr2fr.d $fa5, $a7 - ffint.s.l $fa5, $fa5 - fadd.s $fa5, $fa5, $fa5 - slti $a7, $a6, 0 - movgr2fr.d $fa6, $a6 + movgr2fr.d $fa6, $a7 ffint.s.l $fa6, $fa6 + fadd.s $fa6, $fa6, $fa6 + slti $a7, $a6, 0 + movgr2fr.d $fa7, $a6 + ffint.s.l $fa7, $fa7 movgr2cf $fcc0, $a7 - fsel $fa5, $fa6, $fa5, $fcc0 + fsel $fa6, $fa7, $fa6, $fcc0 bge $s3, $a5, .LBB1_112 # %bb.111: # in Loop: Header=BB1_110 Depth=1 move $t1, $zero @@ -1339,16 +1350,29 @@ matgen: # @matgen .LBB1_112: # %vector.ph621 # in Loop: Header=BB1_110 Depth=1 add.d $a7, $t0, $a4 - xvreplgr2vr.d $xr6, $a0 - xvreplgr2vr.w $xr7, $a6 - xvreplve0.w $xr8, $xr5 + xvreplgr2vr.d $xr7, $a0 + xvreplgr2vr.w $xr8, $a6 + xvreplve0.w $xr9, $xr6 move $t1, $a3 - xvori.b $xr9, $xr2, 0 + xvori.b $xr10, $xr2, 0 .p2align 4, , 16 .LBB1_113: # %vector.body632 # Parent Loop BB1_110 Depth=1 # => This Inner Loop Header: Depth=2 - xvpermi.q $xr10, $xr9, 1 + xvpermi.q $xr11, $xr10, 1 + vpickve2gr.w $t2, $vr11, 2 + bstrpick.d $t2, $t2, 31, 0 + vinsgr2vr.d $vr12, $t2, 0 + vpickve2gr.w $t2, $vr11, 3 + bstrpick.d $t2, $t2, 31, 0 + vinsgr2vr.d $vr12, $t2, 1 + vpickve2gr.w $t2, $vr11, 0 + bstrpick.d $t2, $t2, 31, 0 + vinsgr2vr.d $vr13, $t2, 0 + vpickve2gr.w $t2, $vr11, 1 + bstrpick.d $t2, $t2, 31, 0 + vinsgr2vr.d $vr13, $t2, 1 + xvpermi.q $xr13, $xr12, 2 vpickve2gr.w $t2, $vr10, 2 bstrpick.d $t2, $t2, 31, 0 vinsgr2vr.d $vr11, $t2, 0 @@ -1362,43 +1386,29 @@ matgen: # @matgen bstrpick.d $t2, $t2, 31, 0 vinsgr2vr.d $vr12, $t2, 1 xvpermi.q $xr12, $xr11, 2 - vpickve2gr.w $t2, $vr9, 2 - bstrpick.d $t2, $t2, 31, 0 - vinsgr2vr.d $vr10, $t2, 0 - vpickve2gr.w $t2, $vr9, 3 - bstrpick.d $t2, $t2, 31, 0 - vinsgr2vr.d $vr10, $t2, 1 - vpickve2gr.w $t2, $vr9, 0 - bstrpick.d $t2, $t2, 31, 0 - vinsgr2vr.d $vr11, $t2, 0 - vpickve2gr.w $t2, $vr9, 1 - bstrpick.d $t2, $t2, 31, 0 - vinsgr2vr.d $vr11, $t2, 1 - xvpermi.q $xr11, $xr10, 2 - xvslt.du $xr10, $xr6, $xr12 - xvpickve2gr.d $t2, $xr10, 0 + xvslt.du $xr11, $xr7, $xr13 + xvpickve2gr.d $t2, $xr11, 0 + vinsgr2vr.w $vr13, $t2, 0 + xvpickve2gr.d $t2, $xr11, 1 + vinsgr2vr.w $vr13, $t2, 1 + xvpickve2gr.d $t2, $xr11, 2 + vinsgr2vr.w $vr13, $t2, 2 + xvpickve2gr.d $t2, $xr11, 3 + vinsgr2vr.w $vr13, $t2, 3 + xvslt.du $xr11, $xr7, $xr12 + xvpickve2gr.d $t2, $xr11, 0 vinsgr2vr.w $vr12, $t2, 0 - xvpickve2gr.d $t2, $xr10, 1 + xvpickve2gr.d $t2, $xr11, 1 vinsgr2vr.w $vr12, $t2, 1 - xvpickve2gr.d $t2, $xr10, 2 + xvpickve2gr.d $t2, $xr11, 2 vinsgr2vr.w $vr12, $t2, 2 - xvpickve2gr.d $t2, $xr10, 3 + xvpickve2gr.d $t2, $xr11, 3 vinsgr2vr.w $vr12, $t2, 3 - xvslt.du $xr10, $xr6, $xr11 - xvpickve2gr.d $t2, $xr10, 0 - vinsgr2vr.w $vr11, $t2, 0 - xvpickve2gr.d $t2, $xr10, 1 - vinsgr2vr.w $vr11, $t2, 1 - xvpickve2gr.d $t2, $xr10, 2 - vinsgr2vr.w $vr11, $t2, 2 - xvpickve2gr.d $t2, $xr10, 3 - vinsgr2vr.w $vr11, $t2, 3 - xvpermi.q $xr11, $xr12, 2 - xvaddi.wu $xr10, $xr9, 1 - xvbitsel.v $xr10, $xr7, $xr10, $xr11 - xvffint.s.wu $xr10, $xr10 - xvfdiv.s $xr10, $xr8, $xr10 - xvpermi.q $xr11, $xr10, 1 + xvpermi.q $xr12, $xr13, 2 + xvaddi.wu $xr11, $xr10, 1 + xvbitsel.v $xr11, $xr8, $xr11, $xr12 + xvffint.s.wu $xr11, $xr11 + xvfdiv.s $xr11, $xr9, $xr11 vreplvei.w $vr12, $vr11, 3 fcvt.d.s $ft4, $ft4 vreplvei.w $vr13, $vr11, 2 @@ -1406,50 +1416,30 @@ matgen: # @matgen vextrins.d $vr13, $vr12, 16 vreplvei.w $vr12, $vr11, 1 fcvt.d.s $ft4, $ft4 - vreplvei.w $vr11, $vr11, 0 - fcvt.d.s $ft3, $ft3 - vextrins.d $vr11, $vr12, 16 - xvpermi.q $xr11, $xr13, 2 - vreplvei.w $vr12, $vr10, 3 + vreplvei.w $vr14, $vr11, 0 + fcvt.d.s $ft6, $ft6 + vextrins.d $vr14, $vr12, 16 + xvpermi.q $xr14, $xr13, 2 + xvpermi.q $xr11, $xr11, 1 + vreplvei.w $vr12, $vr11, 3 fcvt.d.s $ft4, $ft4 - vreplvei.w $vr13, $vr10, 2 + vreplvei.w $vr13, $vr11, 2 fcvt.d.s $ft5, $ft5 vextrins.d $vr13, $vr12, 16 - vreplvei.w $vr12, $vr10, 1 + vreplvei.w $vr12, $vr11, 1 fcvt.d.s $ft4, $ft4 - vreplvei.w $vr10, $vr10, 0 - fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr12, 16 - xvpermi.q $xr10, $xr13, 2 - xvfmul.d $xr10, $xr10, $xr4 - xvfmul.d $xr11, $xr11, $xr4 - xvfdiv.d $xr11, $xr11, $xr3 - xvfdiv.d $xr10, $xr10, $xr3 - xvpickve.d $xr12, $xr10, 1 - fcvt.s.d $ft4, $ft4 - xvpickve.d $xr13, $xr10, 0 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr13, $vr12, 16 - xvpickve.d $xr12, $xr10, 2 - fcvt.s.d $ft4, $ft4 - vextrins.w $vr13, $vr12, 32 - xvpickve.d $xr10, $xr10, 3 - fcvt.s.d $ft2, $ft2 - vextrins.w $vr13, $vr10, 48 - xvpickve.d $xr10, $xr11, 1 - fcvt.s.d $ft2, $ft2 - xvpickve.d $xr12, $xr11, 0 - fcvt.s.d $ft4, $ft4 - vextrins.w $vr12, $vr10, 16 - xvpickve.d $xr10, $xr11, 2 - fcvt.s.d $ft2, $ft2 - vextrins.w $vr12, $vr10, 32 - xvpickve.d $xr10, $xr11, 3 - fcvt.s.d $ft2, $ft2 - vextrins.w $vr12, $vr10, 48 - xvpermi.q $xr13, $xr12, 2 - xvst $xr13, $t0, 0 - xvaddi.wu $xr9, $xr9, 8 + vreplvei.w $vr11, $vr11, 0 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr12, 16 + xvpermi.q $xr11, $xr13, 2 + xvfmul.d $xr11, $xr11, $xr5 + xvfmul.d $xr12, $xr14, $xr5 + xvfdiv.d $xr12, $xr12, $xr4 + xvfdiv.d $xr11, $xr11, $xr4 + xvfcvt.s.d $xr11, $xr11, $xr12 + xvperm.w $xr11, $xr11, $xr3 + xvst $xr11, $t0, 0 + xvaddi.wu $xr10, $xr10, 8 addi.d $t1, $t1, -8 addi.d $t0, $t0, 32 bnez $t1, .LBB1_113 @@ -1470,14 +1460,14 @@ matgen: # @matgen maskeqz $t2, $t1, $t2 or $t2, $t2, $t3 bstrpick.d $t2, $t2, 31, 0 - movgr2fr.d $fa6, $t2 - ffint.s.l $fa6, $fa6 - fdiv.s $fa6, $fa5, $fa6 - fcvt.d.s $fa6, $fa6 - fmul.d $fa6, $fa6, $fa1 - fdiv.d $fa6, $fa6, $fa0 - fcvt.s.d $fa6, $fa6 - fst.s $fa6, $a7, 0 + movgr2fr.d $fa7, $t2 + ffint.s.l $fa7, $fa7 + fdiv.s $fa7, $fa6, $fa7 + fcvt.d.s $fa7, $fa7 + fmul.d $fa7, $fa7, $fa1 + fdiv.d $fa7, $fa7, $fa0 + fcvt.s.d $fa7, $fa7 + fst.s $fa7, $a7, 0 addi.d $a7, $a7, 4 addi.d $t0, $t0, 1 bne $a2, $t1, .LBB1_116 @@ -1707,15 +1697,17 @@ matgen: # @matgen slli.d $a3, $a4, 3 slli.d $a4, $a4, 5 xvreplve0.w $xr1, $xr0 - pcalau12i $a5, %pc_hi20(.LCPI1_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI1_1) + pcalau12i $a5, %pc_hi20(.LCPI1_2) + fld.d $fa2, $a5, %pc_lo12(.LCPI1_2) pcalau12i $a5, %pc_hi20(.LCPI1_0) xvld $xr3, $a5, %pc_lo12(.LCPI1_0) + pcalau12i $a5, %pc_hi20(.LCPI1_1) + xvld $xr4, $a5, %pc_lo12(.LCPI1_1) lu12i.w $a5, 172394 ori $a5, $a5, 433 lu32i.d $a5, 184019 lu52i.d $a5, $a5, 1149 - xvreplgr2vr.d $xr4, $a5 + xvreplgr2vr.d $xr5, $a5 ori $a5, $zero, 8 b .LBB1_147 .LBB1_146: # %._crit_edge461 @@ -1732,14 +1724,14 @@ matgen: # @matgen srli.d $a7, $a6, 1 andi $t1, $a6, 1 or $a7, $t1, $a7 - movgr2fr.d $fa5, $a7 - ffint.s.l $fa5, $fa5 - fadd.s $fa5, $fa5, $fa5 - slti $a7, $a6, 0 - movgr2fr.d $fa6, $a6 + movgr2fr.d $fa6, $a7 ffint.s.l $fa6, $fa6 + fadd.s $fa6, $fa6, $fa6 + slti $a7, $a6, 0 + movgr2fr.d $fa7, $a6 + ffint.s.l $fa7, $fa7 movgr2cf $fcc0, $a7 - fsel $fa5, $fa6, $fa5, $fcc0 + fsel $fa6, $fa7, $fa6, $fcc0 bge $s3, $a5, .LBB1_149 # %bb.148: # in Loop: Header=BB1_147 Depth=1 move $t1, $zero @@ -1748,16 +1740,29 @@ matgen: # @matgen .LBB1_149: # %vector.ph # in Loop: Header=BB1_147 Depth=1 add.d $a7, $t0, $a4 - xvreplgr2vr.d $xr6, $a0 - xvreplgr2vr.w $xr7, $a6 - xvreplve0.w $xr8, $xr5 + xvreplgr2vr.d $xr7, $a0 + xvreplgr2vr.w $xr8, $a6 + xvreplve0.w $xr9, $xr6 move $t1, $a3 - xvori.b $xr9, $xr3, 0 + xvori.b $xr10, $xr3, 0 .p2align 4, , 16 .LBB1_150: # %vector.body # Parent Loop BB1_147 Depth=1 # => This Inner Loop Header: Depth=2 - xvpermi.q $xr10, $xr9, 1 + xvpermi.q $xr11, $xr10, 1 + vpickve2gr.w $t2, $vr11, 2 + bstrpick.d $t2, $t2, 31, 0 + vinsgr2vr.d $vr12, $t2, 0 + vpickve2gr.w $t2, $vr11, 3 + bstrpick.d $t2, $t2, 31, 0 + vinsgr2vr.d $vr12, $t2, 1 + vpickve2gr.w $t2, $vr11, 0 + bstrpick.d $t2, $t2, 31, 0 + vinsgr2vr.d $vr13, $t2, 0 + vpickve2gr.w $t2, $vr11, 1 + bstrpick.d $t2, $t2, 31, 0 + vinsgr2vr.d $vr13, $t2, 1 + xvpermi.q $xr13, $xr12, 2 vpickve2gr.w $t2, $vr10, 2 bstrpick.d $t2, $t2, 31, 0 vinsgr2vr.d $vr11, $t2, 0 @@ -1771,93 +1776,59 @@ matgen: # @matgen bstrpick.d $t2, $t2, 31, 0 vinsgr2vr.d $vr12, $t2, 1 xvpermi.q $xr12, $xr11, 2 - vpickve2gr.w $t2, $vr9, 2 - bstrpick.d $t2, $t2, 31, 0 - vinsgr2vr.d $vr10, $t2, 0 - vpickve2gr.w $t2, $vr9, 3 - bstrpick.d $t2, $t2, 31, 0 - vinsgr2vr.d $vr10, $t2, 1 - vpickve2gr.w $t2, $vr9, 0 - bstrpick.d $t2, $t2, 31, 0 - vinsgr2vr.d $vr11, $t2, 0 - vpickve2gr.w $t2, $vr9, 1 - bstrpick.d $t2, $t2, 31, 0 - vinsgr2vr.d $vr11, $t2, 1 - xvpermi.q $xr11, $xr10, 2 - xvslt.du $xr10, $xr6, $xr12 - xvpickve2gr.d $t2, $xr10, 0 + xvslt.du $xr11, $xr7, $xr13 + xvpickve2gr.d $t2, $xr11, 0 + vinsgr2vr.w $vr13, $t2, 0 + xvpickve2gr.d $t2, $xr11, 1 + vinsgr2vr.w $vr13, $t2, 1 + xvpickve2gr.d $t2, $xr11, 2 + vinsgr2vr.w $vr13, $t2, 2 + xvpickve2gr.d $t2, $xr11, 3 + vinsgr2vr.w $vr13, $t2, 3 + xvslt.du $xr11, $xr7, $xr12 + xvpickve2gr.d $t2, $xr11, 0 vinsgr2vr.w $vr12, $t2, 0 - xvpickve2gr.d $t2, $xr10, 1 + xvpickve2gr.d $t2, $xr11, 1 vinsgr2vr.w $vr12, $t2, 1 - xvpickve2gr.d $t2, $xr10, 2 + xvpickve2gr.d $t2, $xr11, 2 vinsgr2vr.w $vr12, $t2, 2 - xvpickve2gr.d $t2, $xr10, 3 + xvpickve2gr.d $t2, $xr11, 3 vinsgr2vr.w $vr12, $t2, 3 - xvslt.du $xr10, $xr6, $xr11 - xvpickve2gr.d $t2, $xr10, 0 - vinsgr2vr.w $vr11, $t2, 0 - xvpickve2gr.d $t2, $xr10, 1 - vinsgr2vr.w $vr11, $t2, 1 - xvpickve2gr.d $t2, $xr10, 2 - vinsgr2vr.w $vr11, $t2, 2 - xvpickve2gr.d $t2, $xr10, 3 - vinsgr2vr.w $vr11, $t2, 3 - xvpermi.q $xr11, $xr12, 2 - xvaddi.wu $xr10, $xr9, 1 - xvbitsel.v $xr10, $xr7, $xr10, $xr11 - xvffint.s.wu $xr10, $xr10 - xvfdiv.s $xr10, $xr10, $xr8 - xvfmul.s $xr10, $xr10, $xr1 - vreplvei.w $vr11, $vr10, 3 - fcvt.d.s $ft3, $ft3 - vreplvei.w $vr12, $vr10, 2 - fcvt.d.s $ft4, $ft4 - vextrins.d $vr12, $vr11, 16 - vreplvei.w $vr11, $vr10, 1 - fcvt.d.s $ft3, $ft3 - vreplvei.w $vr13, $vr10, 0 + xvpermi.q $xr12, $xr13, 2 + xvaddi.wu $xr11, $xr10, 1 + xvbitsel.v $xr11, $xr8, $xr11, $xr12 + xvffint.s.wu $xr11, $xr11 + xvfdiv.s $xr11, $xr11, $xr9 + xvfmul.s $xr11, $xr11, $xr1 + xvpermi.q $xr12, $xr11, 1 + vreplvei.w $vr13, $vr12, 3 fcvt.d.s $ft5, $ft5 - vextrins.d $vr13, $vr11, 16 - xvpermi.q $xr13, $xr12, 2 - xvpermi.q $xr10, $xr10, 1 - vreplvei.w $vr11, $vr10, 3 - fcvt.d.s $ft3, $ft3 - vreplvei.w $vr12, $vr10, 2 + vreplvei.w $vr14, $vr12, 2 + fcvt.d.s $ft6, $ft6 + vextrins.d $vr14, $vr13, 16 + vreplvei.w $vr13, $vr12, 1 + fcvt.d.s $ft5, $ft5 + vreplvei.w $vr12, $vr12, 0 fcvt.d.s $ft4, $ft4 - vextrins.d $vr12, $vr11, 16 - vreplvei.w $vr11, $vr10, 1 + vextrins.d $vr12, $vr13, 16 + xvpermi.q $xr12, $xr14, 2 + vreplvei.w $vr13, $vr11, 3 + fcvt.d.s $ft5, $ft5 + vreplvei.w $vr14, $vr11, 2 + fcvt.d.s $ft6, $ft6 + vextrins.d $vr14, $vr13, 16 + vreplvei.w $vr13, $vr11, 1 + fcvt.d.s $ft5, $ft5 + vreplvei.w $vr11, $vr11, 0 fcvt.d.s $ft3, $ft3 - vreplvei.w $vr10, $vr10, 0 - fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr11, 16 - xvpermi.q $xr10, $xr12, 2 - xvfdiv.d $xr10, $xr10, $xr4 - xvfdiv.d $xr11, $xr13, $xr4 - xvpickve.d $xr12, $xr11, 1 - fcvt.s.d $ft4, $ft4 - xvpickve.d $xr13, $xr11, 0 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr13, $vr12, 16 - xvpickve.d $xr12, $xr11, 2 - fcvt.s.d $ft4, $ft4 - vextrins.w $vr13, $vr12, 32 - xvpickve.d $xr11, $xr11, 3 - fcvt.s.d $ft3, $ft3 - vextrins.w $vr13, $vr11, 48 - xvpickve.d $xr11, $xr10, 1 - fcvt.s.d $ft3, $ft3 - xvpickve.d $xr12, $xr10, 0 - fcvt.s.d $ft4, $ft4 - vextrins.w $vr12, $vr11, 16 - xvpickve.d $xr11, $xr10, 2 - fcvt.s.d $ft3, $ft3 - vextrins.w $vr12, $vr11, 32 - xvpickve.d $xr10, $xr10, 3 - fcvt.s.d $ft2, $ft2 - vextrins.w $vr12, $vr10, 48 - xvpermi.q $xr13, $xr12, 2 - xvst $xr13, $t0, 0 - xvaddi.wu $xr9, $xr9, 8 + vextrins.d $vr11, $vr13, 16 + xvpermi.q $xr11, $xr14, 2 + xvfdiv.d $xr11, $xr11, $xr5 + xvfdiv.d $xr12, $xr12, $xr5 + xvfcvt.s.d $xr11, $xr12, $xr11 + xvperm.w $xr11, $xr11, $xr4 + xvst $xr11, $t0, 0 + xvaddi.wu $xr10, $xr10, 8 addi.d $t1, $t1, -8 addi.d $t0, $t0, 32 bnez $t1, .LBB1_150 @@ -1878,14 +1849,14 @@ matgen: # @matgen maskeqz $t2, $t1, $t2 or $t2, $t2, $t3 bstrpick.d $t2, $t2, 31, 0 - movgr2fr.d $fa6, $t2 - ffint.s.l $fa6, $fa6 - fdiv.s $fa6, $fa6, $fa5 - fmul.s $fa6, $fa6, $fa0 - fcvt.d.s $fa6, $fa6 - fdiv.d $fa6, $fa6, $fa2 - fcvt.s.d $fa6, $fa6 - fst.s $fa6, $a7, 0 + movgr2fr.d $fa7, $t2 + ffint.s.l $fa7, $fa7 + fdiv.s $fa7, $fa7, $fa6 + fmul.s $fa7, $fa7, $fa0 + fcvt.d.s $fa7, $fa7 + fdiv.d $fa7, $fa7, $fa2 + fcvt.s.d $fa7, $fa7 + fst.s $fa7, $a7, 0 addi.d $a7, $a7, 4 addi.d $t0, $t0, 1 bne $a2, $t1, .LBB1_153 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btBvhTriangleMeshShape.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btBvhTriangleMeshShape.s index 604011e..5f68827 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btBvhTriangleMeshShape.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btBvhTriangleMeshShape.s @@ -1663,19 +1663,16 @@ _ZZNK22btBvhTriangleMeshShape19processAllTrianglesEP18btTriangleCallbackRK9btVec ld.w $a3, $a3, 0 .LBB22_6: # %.split38.us mul.w $a2, $a2, $a3 - fldx.d $fa0, $a1, $a2 - fld.s $fa1, $a0, 8 + vldx $vr0, $a1, $a2 add.d $a1, $a1, $a2 - fcvt.s.d $fa0, $fa0 - fmul.s $fa0, $fa1, $fa0 - fld.d $fa1, $a1, 8 - fld.s $fa2, $a0, 12 - fld.d $fa3, $a1, 16 - fld.s $fa4, $a0, 16 + ld.d $a2, $a0, 8 + vfcvt.s.d $vr0, $vr0, $vr0 + fld.d $fa1, $a1, 16 + fld.s $fa2, $a0, 16 + vinsgr2vr.d $vr3, $a2, 0 + vfmul.s $vr0, $vr3, $vr0 fcvt.s.d $fa1, $fa1 fmul.s $fa1, $fa2, $fa1 - fcvt.s.d $fa2, $fa3 - fmul.s $fa2, $fa4, $fa2 b .LBB22_9 .LBB22_7: # %.split.us.split.preheader ld.w $a4, $a3, 8 @@ -1714,20 +1711,19 @@ _ZZNK22btBvhTriangleMeshShape19processAllTrianglesEP18btTriangleCallbackRK9btVec .LBB22_8: # %.split38.us mul.w $a2, $a2, $a3 add.d $a3, $a1, $a2 - fldx.s $fa0, $a1, $a2 - fld.s $fa1, $a0, 8 - fld.s $fa2, $a3, 4 - fld.s $fa3, $a0, 12 - fld.s $fa4, $a3, 8 - fld.s $fa5, $a0, 16 - fmul.s $fa0, $fa0, $fa1 - fmul.s $fa1, $fa2, $fa3 - fmul.s $fa2, $fa4, $fa5 + ldx.d $a1, $a1, $a2 + ld.d $a2, $a0, 8 + fld.s $fa1, $a3, 8 + fld.s $fa2, $a0, 16 + vinsgr2vr.d $vr0, $a1, 0 + vinsgr2vr.d $vr3, $a2, 0 + vfmul.s $vr0, $vr0, $vr3 + fmul.s $fa1, $fa1, $fa2 .LBB22_9: # %.split38.us ld.d $a0, $fp, 16 - fst.s $fa0, $fp, 24 - fst.s $fa1, $fp, 28 - fst.s $fa2, $fp, 32 + vstelm.w $vr0, $fp, 24, 0 + vstelm.w $vr0, $fp, 28, 1 + fst.s $fa1, $fp, 32 ld.d $a1, $a0, 0 st.w $zero, $fp, 36 ld.d $a4, $a1, 16 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/tracks.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/tracks.s index eee5006..32d544a 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/tracks.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/tracks.s @@ -682,9 +682,18 @@ free_tracks: # @free_tracks .word 5 # 0x5 .word 6 # 0x6 .word 7 # 0x7 +.LCPI6_1: + .word 0 # 0x0 + .word 1 # 0x1 + .word 4 # 0x4 + .word 5 # 0x5 + .word 2 # 0x2 + .word 3 # 0x3 + .word 6 # 0x6 + .word 7 # 0x7 .section .rodata.cst8,"aM",@progbits,8 .p2align 3, 0x0 -.LCPI6_1: +.LCPI6_2: .dword 0x400921fb54442d18 # double 3.1415926535897931 .text .globl generate_polar_angles @@ -718,49 +727,30 @@ generate_polar_angles: # @generate_polar_angles xvldi $xr3, -928 lu12i.w $a2, 345154 ori $a2, $a2, 3352 + pcalau12i $a3, %pc_hi20(.LCPI6_1) + xvld $xr4, $a3, %pc_lo12(.LCPI6_1) lu32i.d $a2, -450053 lu52i.d $a2, $a2, 1024 - xvreplgr2vr.d $xr4, $a2 + xvreplgr2vr.d $xr5, $a2 move $a2, $a0 move $a3, $a1 .p2align 4, , 16 .LBB6_4: # %vector.body # =>This Inner Loop Header: Depth=1 - xvpermi.q $xr5, $xr1, 1 - vext2xv.du.wu $xr5, $xr5 - xvffint.d.lu $xr5, $xr5 - vext2xv.du.wu $xr6, $xr1 + xvpermi.q $xr6, $xr1, 1 + vext2xv.du.wu $xr6, $xr6 xvffint.d.lu $xr6, $xr6 - xvfadd.d $xr5, $xr5, $xr3 + vext2xv.du.wu $xr7, $xr1 + xvffint.d.lu $xr7, $xr7 + xvfadd.d $xr7, $xr7, $xr3 xvfadd.d $xr6, $xr6, $xr3 - xvfmul.d $xr6, $xr6, $xr4 - xvfmul.d $xr5, $xr5, $xr4 - xvfdiv.d $xr5, $xr5, $xr2 + xvfmul.d $xr6, $xr6, $xr5 + xvfmul.d $xr7, $xr7, $xr5 + xvfdiv.d $xr7, $xr7, $xr2 xvfdiv.d $xr6, $xr6, $xr2 - xvpickve.d $xr7, $xr6, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr6, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr6, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr6, $xr6, 3 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr8, $vr6, 48 - xvpickve.d $xr6, $xr5, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr5, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr5, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 48 - xvpermi.q $xr8, $xr7, 2 - xvst $xr8, $a2, 0 + xvfcvt.s.d $xr6, $xr6, $xr7 + xvperm.w $xr6, $xr6, $xr4 + xvst $xr6, $a2, 0 xvaddi.wu $xr1, $xr1, 8 addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 @@ -768,8 +758,8 @@ generate_polar_angles: # @generate_polar_angles # %bb.5: # %middle.block beq $a1, $fp, .LBB6_8 .LBB6_6: # %scalar.ph.preheader - pcalau12i $a2, %pc_hi20(.LCPI6_1) - fld.d $fa1, $a2, %pc_lo12(.LCPI6_1) + pcalau12i $a2, %pc_hi20(.LCPI6_2) + fld.d $fa1, $a2, %pc_lo12(.LCPI6_2) alsl.d $a2, $a1, $a0, 2 sub.d $a3, $fp, $a1 vldi $vr2, -928 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jcdctmgr.s b/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jcdctmgr.s index 755e4e3..411d1c6 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jcdctmgr.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jcdctmgr.s @@ -812,33 +812,24 @@ start_pass_fdctmgr: # @start_pass_fdctmgr ffint.d.w $fa5, $fa1 vextrins.d $vr5, $vr4, 16 vreplvei.d $vr1, $vr0, 0 + vfmul.d $vr4, $vr1, $vr5 + xvori.b $xr5, $xr0, 0 fst.d $fs0, $sp, 184 fst.d $fs1, $sp, 176 fst.d $fs2, $sp, 168 fst.d $fa3, $sp, 160 xvld $xr3, $sp, 160 - vfmul.d $vr4, $vr1, $vr5 - xvori.b $xr5, $xr0, 0 xvinsve0.d $xr5, $xr2, 1 xvpermi.q $xr5, $xr4, 2 + ld.hu $a3, $a2, 0 xvfmul.d $xr2, $xr5, $xr3 xvfmul.d $xr2, $xr2, $xr6 xvfrecip.d $xr2, $xr2 - xvpickve.d $xr3, $xr2, 1 - fcvt.s.d $fa3, $fa3 - xvpickve.d $xr4, $xr2, 0 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr4, $vr3, 16 - xvpickve.d $xr3, $xr2, 2 - ld.hu $a3, $a2, 0 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr4, $vr3, 32 - xvpickve.d $xr2, $xr2, 3 movgr2fr.w $fa3, $a3 ld.hu $a3, $a2, 2 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr4, $vr2, 48 - vst $vr4, $a0, -16 + xvpermi.q $xr4, $xr2, 1 + vfcvt.s.d $vr2, $vr4, $vr2 + vst $vr2, $a0, -16 movgr2fr.w $fa2, $a3 ld.w $a3, $a2, 4 ffint.d.w $fa3, $fa3 @@ -865,18 +856,9 @@ start_pass_fdctmgr: # @start_pass_fdctmgr xvfmul.d $xr0, $xr0, $xr3 xvfmul.d $xr0, $xr0, $xr6 xvfrecip.d $xr0, $xr0 - xvpickve.d $xr1, $xr0, 1 - fcvt.s.d $fa1, $fa1 - xvpickve.d $xr2, $xr0, 0 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr2, $vr1, 16 - xvpickve.d $xr1, $xr0, 2 - fcvt.s.d $fa1, $fa1 - vextrins.w $vr2, $vr1, 32 - xvpickve.d $xr0, $xr0, 3 - fcvt.s.d $fa0, $fa0 - vextrins.w $vr2, $vr0, 48 - vst $vr2, $a0, 0 + xvpermi.q $xr1, $xr0, 1 + vfcvt.s.d $vr0, $vr1, $vr0 + vst $vr0, $a0, 0 addi.d $a1, $a1, 8 addi.d $a2, $a2, 16 addi.d $a0, $a0, 32 diff --git a/results/MultiSource/Benchmarks/MiBench/telecomm-FFT/CMakeFiles/telecomm-fft.dir/fourierf.s b/results/MultiSource/Benchmarks/MiBench/telecomm-FFT/CMakeFiles/telecomm-fft.dir/fourierf.s index 84a4ae3..005acfa 100644 --- a/results/MultiSource/Benchmarks/MiBench/telecomm-FFT/CMakeFiles/telecomm-fft.dir/fourierf.s +++ b/results/MultiSource/Benchmarks/MiBench/telecomm-FFT/CMakeFiles/telecomm-fft.dir/fourierf.s @@ -20,6 +20,17 @@ .dword 0xbfe921fb54442d18 # double -0.78539816339744828 .LCPI0_7: .dword 0xbfc921fb54442d18 # double -0.19634954084936207 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 +.LCPI0_8: + .word 0 # 0x0 + .word 1 # 0x1 + .word 4 # 0x4 + .word 5 # 0x5 + .word 2 # 0x2 + .word 3 # 0x3 + .word 6 # 0x6 + .word 7 # 0x7 .text .globl fft_float .p2align 5 @@ -347,115 +358,75 @@ fft_float: # @fft_float bnez $a0, .LBB0_28 b .LBB0_20 .LBB0_35: # %vector.ph + pcalau12i $a0, %pc_hi20(.LCPI0_8) + xvld $xr1, $a0, %pc_lo12(.LCPI0_8) bstrpick.d $a0, $s7, 31, 3 slli.d $a0, $a0, 3 - xvreplve0.d $xr1, $xr0 + xvreplve0.d $xr2, $xr0 move $a1, $s0 move $a2, $fp move $a3, $a0 .p2align 4, , 16 .LBB0_36: # %vector.body # =>This Inner Loop Header: Depth=1 - xvld $xr2, $a1, 0 - xvpermi.q $xr3, $xr2, 1 - vreplvei.w $vr4, $vr3, 3 - fcvt.d.s $fa4, $fa4 - vreplvei.w $vr5, $vr3, 2 + xvld $xr3, $a1, 0 + xvpermi.q $xr4, $xr3, 1 + vreplvei.w $vr5, $vr4, 3 fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr4, 16 - vreplvei.w $vr4, $vr3, 1 - fcvt.d.s $fa4, $fa4 - vreplvei.w $vr3, $vr3, 0 - fcvt.d.s $fa3, $fa3 - vextrins.d $vr3, $vr4, 16 - xvpermi.q $xr3, $xr5, 2 - vreplvei.w $vr4, $vr2, 3 - fcvt.d.s $fa4, $fa4 - vreplvei.w $vr5, $vr2, 2 + vreplvei.w $vr6, $vr4, 2 + fcvt.d.s $fa6, $fa6 + vextrins.d $vr6, $vr5, 16 + vreplvei.w $vr5, $vr4, 1 fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr4, 16 - vreplvei.w $vr4, $vr2, 1 - fcvt.d.s $fa4, $fa4 - vreplvei.w $vr2, $vr2, 0 - fcvt.d.s $fa2, $fa2 - vextrins.d $vr2, $vr4, 16 - xvpermi.q $xr2, $xr5, 2 - xvfdiv.d $xr3, $xr3, $xr1 - xvfdiv.d $xr2, $xr2, $xr1 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpickve.d $xr3, $xr2, 1 - fcvt.s.d $fa3, $fa3 - xvpickve.d $xr4, $xr2, 0 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr4, $vr3, 16 - xvpickve.d $xr3, $xr2, 2 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr4, $vr3, 32 - xvpickve.d $xr2, $xr2, 3 - fcvt.s.d $fa2, $fa2 - xvld $xr3, $a2, 0 - vextrins.w $vr4, $vr2, 48 - xvpermi.q $xr4, $xr5, 2 - xvst $xr4, $a1, 0 - xvpermi.q $xr2, $xr3, 1 - vreplvei.w $vr4, $vr2, 3 + vreplvei.w $vr4, $vr4, 0 fcvt.d.s $fa4, $fa4 - vreplvei.w $vr5, $vr2, 2 + vextrins.d $vr4, $vr5, 16 + xvpermi.q $xr4, $xr6, 2 + vreplvei.w $vr5, $vr3, 3 fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr4, 16 - vreplvei.w $vr4, $vr2, 1 - fcvt.d.s $fa4, $fa4 - vreplvei.w $vr2, $vr2, 0 - fcvt.d.s $fa2, $fa2 - vextrins.d $vr2, $vr4, 16 - xvpermi.q $xr2, $xr5, 2 + vreplvei.w $vr6, $vr3, 2 + fcvt.d.s $fa6, $fa6 + vextrins.d $vr6, $vr5, 16 + vreplvei.w $vr5, $vr3, 1 + fcvt.d.s $fa5, $fa5 + vreplvei.w $vr3, $vr3, 0 + fcvt.d.s $fa3, $fa3 + vextrins.d $vr3, $vr5, 16 + xvpermi.q $xr3, $xr6, 2 + xvfdiv.d $xr4, $xr4, $xr2 + xvfdiv.d $xr3, $xr3, $xr2 + xvld $xr5, $a2, 0 + xvfcvt.s.d $xr3, $xr4, $xr3 + xvperm.w $xr3, $xr3, $xr1 + xvst $xr3, $a1, 0 + xvpermi.q $xr3, $xr5, 1 vreplvei.w $vr4, $vr3, 3 fcvt.d.s $fa4, $fa4 - vreplvei.w $vr5, $vr3, 2 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr4, 16 + vreplvei.w $vr6, $vr3, 2 + fcvt.d.s $fa6, $fa6 + vextrins.d $vr6, $vr4, 16 vreplvei.w $vr4, $vr3, 1 fcvt.d.s $fa4, $fa4 vreplvei.w $vr3, $vr3, 0 fcvt.d.s $fa3, $fa3 vextrins.d $vr3, $vr4, 16 - xvpermi.q $xr3, $xr5, 2 - xvfdiv.d $xr2, $xr2, $xr1 - xvfdiv.d $xr3, $xr3, $xr1 - xvpickve.d $xr4, $xr2, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr2, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr2, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr2, $xr2, 3 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr5, $vr2, 48 - xvpickve.d $xr2, $xr3, 1 - fcvt.s.d $fa2, $fa2 - xvpickve.d $xr4, $xr3, 0 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr4, $vr2, 16 - xvpickve.d $xr2, $xr3, 2 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr4, $vr2, 32 - xvpickve.d $xr2, $xr3, 3 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr4, $vr2, 48 - xvpermi.q $xr4, $xr5, 2 - xvst $xr4, $a2, 0 + xvpermi.q $xr3, $xr6, 2 + vreplvei.w $vr4, $vr5, 3 + fcvt.d.s $fa4, $fa4 + vreplvei.w $vr6, $vr5, 2 + fcvt.d.s $fa6, $fa6 + vextrins.d $vr6, $vr4, 16 + vreplvei.w $vr4, $vr5, 1 + fcvt.d.s $fa4, $fa4 + vreplvei.w $vr5, $vr5, 0 + fcvt.d.s $fa5, $fa5 + vextrins.d $vr5, $vr4, 16 + xvpermi.q $xr5, $xr6, 2 + xvfdiv.d $xr3, $xr3, $xr2 + xvfdiv.d $xr4, $xr5, $xr2 + xvfcvt.s.d $xr3, $xr3, $xr4 + xvperm.w $xr3, $xr3, $xr1 + xvst $xr3, $a2, 0 addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 addi.d $a1, $a1, 32 @@ -538,6 +509,17 @@ fft_float: # @fft_float .dword 0xbfe921fb54442d18 # double -0.78539816339744828 .LCPI1_7: .dword 0xbfc921fb54442d18 # double -0.19634954084936207 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 +.LCPI1_8: + .word 0 # 0x0 + .word 1 # 0x1 + .word 4 # 0x4 + .word 5 # 0x5 + .word 2 # 0x2 + .word 3 # 0x3 + .word 6 # 0x6 + .word 7 # 0x7 .text .globl fft_float_StrictFP .p2align 5 @@ -868,115 +850,75 @@ fft_float_StrictFP: # @fft_float_StrictFP bnez $a0, .LBB1_28 b .LBB1_20 .LBB1_35: # %vector.ph + pcalau12i $a0, %pc_hi20(.LCPI1_8) + xvld $xr1, $a0, %pc_lo12(.LCPI1_8) bstrpick.d $a0, $s7, 31, 3 slli.d $a0, $a0, 3 - xvreplve0.d $xr1, $xr0 + xvreplve0.d $xr2, $xr0 move $a1, $s0 move $a2, $fp move $a3, $a0 .p2align 4, , 16 .LBB1_36: # %vector.body # =>This Inner Loop Header: Depth=1 - xvld $xr2, $a1, 0 - xvpermi.q $xr3, $xr2, 1 - vreplvei.w $vr4, $vr3, 3 - fcvt.d.s $fa4, $fa4 - vreplvei.w $vr5, $vr3, 2 + xvld $xr3, $a1, 0 + xvpermi.q $xr4, $xr3, 1 + vreplvei.w $vr5, $vr4, 3 fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr4, 16 - vreplvei.w $vr4, $vr3, 1 - fcvt.d.s $fa4, $fa4 - vreplvei.w $vr3, $vr3, 0 - fcvt.d.s $fa3, $fa3 - vextrins.d $vr3, $vr4, 16 - xvpermi.q $xr3, $xr5, 2 - vreplvei.w $vr4, $vr2, 3 - fcvt.d.s $fa4, $fa4 - vreplvei.w $vr5, $vr2, 2 + vreplvei.w $vr6, $vr4, 2 + fcvt.d.s $fa6, $fa6 + vextrins.d $vr6, $vr5, 16 + vreplvei.w $vr5, $vr4, 1 fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr4, 16 - vreplvei.w $vr4, $vr2, 1 - fcvt.d.s $fa4, $fa4 - vreplvei.w $vr2, $vr2, 0 - fcvt.d.s $fa2, $fa2 - vextrins.d $vr2, $vr4, 16 - xvpermi.q $xr2, $xr5, 2 - xvfdiv.d $xr3, $xr3, $xr1 - xvfdiv.d $xr2, $xr2, $xr1 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpickve.d $xr3, $xr2, 1 - fcvt.s.d $fa3, $fa3 - xvpickve.d $xr4, $xr2, 0 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr4, $vr3, 16 - xvpickve.d $xr3, $xr2, 2 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr4, $vr3, 32 - xvpickve.d $xr2, $xr2, 3 - fcvt.s.d $fa2, $fa2 - xvld $xr3, $a2, 0 - vextrins.w $vr4, $vr2, 48 - xvpermi.q $xr4, $xr5, 2 - xvst $xr4, $a1, 0 - xvpermi.q $xr2, $xr3, 1 - vreplvei.w $vr4, $vr2, 3 + vreplvei.w $vr4, $vr4, 0 fcvt.d.s $fa4, $fa4 - vreplvei.w $vr5, $vr2, 2 + vextrins.d $vr4, $vr5, 16 + xvpermi.q $xr4, $xr6, 2 + vreplvei.w $vr5, $vr3, 3 fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr4, 16 - vreplvei.w $vr4, $vr2, 1 - fcvt.d.s $fa4, $fa4 - vreplvei.w $vr2, $vr2, 0 - fcvt.d.s $fa2, $fa2 - vextrins.d $vr2, $vr4, 16 - xvpermi.q $xr2, $xr5, 2 + vreplvei.w $vr6, $vr3, 2 + fcvt.d.s $fa6, $fa6 + vextrins.d $vr6, $vr5, 16 + vreplvei.w $vr5, $vr3, 1 + fcvt.d.s $fa5, $fa5 + vreplvei.w $vr3, $vr3, 0 + fcvt.d.s $fa3, $fa3 + vextrins.d $vr3, $vr5, 16 + xvpermi.q $xr3, $xr6, 2 + xvfdiv.d $xr4, $xr4, $xr2 + xvfdiv.d $xr3, $xr3, $xr2 + xvld $xr5, $a2, 0 + xvfcvt.s.d $xr3, $xr4, $xr3 + xvperm.w $xr3, $xr3, $xr1 + xvst $xr3, $a1, 0 + xvpermi.q $xr3, $xr5, 1 vreplvei.w $vr4, $vr3, 3 fcvt.d.s $fa4, $fa4 - vreplvei.w $vr5, $vr3, 2 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr4, 16 + vreplvei.w $vr6, $vr3, 2 + fcvt.d.s $fa6, $fa6 + vextrins.d $vr6, $vr4, 16 vreplvei.w $vr4, $vr3, 1 fcvt.d.s $fa4, $fa4 vreplvei.w $vr3, $vr3, 0 fcvt.d.s $fa3, $fa3 vextrins.d $vr3, $vr4, 16 - xvpermi.q $xr3, $xr5, 2 - xvfdiv.d $xr2, $xr2, $xr1 - xvfdiv.d $xr3, $xr3, $xr1 - xvpickve.d $xr4, $xr2, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr2, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr2, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr2, $xr2, 3 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr5, $vr2, 48 - xvpickve.d $xr2, $xr3, 1 - fcvt.s.d $fa2, $fa2 - xvpickve.d $xr4, $xr3, 0 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr4, $vr2, 16 - xvpickve.d $xr2, $xr3, 2 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr4, $vr2, 32 - xvpickve.d $xr2, $xr3, 3 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr4, $vr2, 48 - xvpermi.q $xr4, $xr5, 2 - xvst $xr4, $a2, 0 + xvpermi.q $xr3, $xr6, 2 + vreplvei.w $vr4, $vr5, 3 + fcvt.d.s $fa4, $fa4 + vreplvei.w $vr6, $vr5, 2 + fcvt.d.s $fa6, $fa6 + vextrins.d $vr6, $vr4, 16 + vreplvei.w $vr4, $vr5, 1 + fcvt.d.s $fa4, $fa4 + vreplvei.w $vr5, $vr5, 0 + fcvt.d.s $fa5, $fa5 + vextrins.d $vr5, $vr4, 16 + xvpermi.q $xr5, $xr6, 2 + xvfdiv.d $xr3, $xr3, $xr2 + xvfdiv.d $xr4, $xr5, $xr2 + xvfcvt.s.d $xr3, $xr3, $xr4 + xvperm.w $xr3, $xr3, $xr1 + xvst $xr3, $a2, 0 addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 addi.d $a1, $a1, 32 diff --git a/results/MultiSource/Benchmarks/Rodinia/srad/CMakeFiles/srad.dir/sradKernel.s b/results/MultiSource/Benchmarks/Rodinia/srad/CMakeFiles/srad.dir/sradKernel.s index 6dd7cd4..d40a118 100644 --- a/results/MultiSource/Benchmarks/Rodinia/srad/CMakeFiles/srad.dir/sradKernel.s +++ b/results/MultiSource/Benchmarks/Rodinia/srad/CMakeFiles/srad.dir/sradKernel.s @@ -1,6 +1,17 @@ .file "sradKernel.c" + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 # -- Begin function srad_kernel +.LCPI0_0: + .word 0 # 0x0 + .word 1 # 0x1 + .word 4 # 0x4 + .word 5 # 0x5 + .word 2 # 0x2 + .word 3 # 0x3 + .word 6 # 0x6 + .word 7 # 0x7 .text - .globl srad_kernel # -- Begin function srad_kernel + .globl srad_kernel .p2align 5 .type srad_kernel,@function srad_kernel: # @srad_kernel @@ -17,33 +28,33 @@ srad_kernel: # @srad_kernel st.d $s6, $sp, 264 # 8-byte Folded Spill st.d $s7, $sp, 256 # 8-byte Folded Spill st.d $s8, $sp, 248 # 8-byte Folded Spill - move $s5, $a3 - move $s7, $a1 + move $s6, $a3 + move $s8, $a1 move $s1, $zero lu12i.w $s0, 2047 add.d $s2, $a6, $s0 lu12i.w $a1, 2046 ori $t7, $a1, 4092 add.d $t8, $a0, $t7 - add.d $a4, $s7, $t7 + add.d $a4, $s8, $t7 add.d $a3, $a2, $t7 - add.d $fp, $s5, $t7 + add.d $fp, $s6, $t7 addu16i.d $a7, $a5, 128 addi.d $a7, $a7, -4 lu12i.w $t0, 1 ori $s3, $t0, 4 add.d $s4, $a0, $s3 - add.d $s6, $s7, $s3 - add.d $s8, $a2, $s3 - add.d $ra, $s5, $s3 + add.d $s5, $s8, $s3 + add.d $s7, $a2, $s3 + add.d $ra, $s6, $s3 add.d $t1, $a6, $s3 add.d $t2, $a6, $t7 addi.d $t3, $a5, 4 sltu $t4, $s4, $a4 - sltu $t5, $s6, $t8 + sltu $t5, $s5, $t8 and $t4, $t4, $t5 sltu $t5, $s4, $a3 - sltu $t6, $s8, $t8 + sltu $t6, $s7, $t8 and $t5, $t5, $t6 or $t4, $t4, $t5 sltu $t5, $s4, $fp @@ -59,33 +70,33 @@ srad_kernel: # @srad_kernel sltu $t6, $t3, $t8 and $t5, $t5, $t6 or $t4, $t4, $t5 - sltu $t5, $s6, $a3 - sltu $t6, $s8, $a4 + sltu $t5, $s5, $a3 + sltu $t6, $s7, $a4 and $t5, $t5, $t6 or $t4, $t4, $t5 - sltu $t5, $s6, $fp + sltu $t5, $s5, $fp sltu $t6, $ra, $a4 and $t5, $t5, $t6 or $t4, $t4, $t5 - sltu $t5, $s6, $t2 + sltu $t5, $s5, $t2 sltu $t6, $t1, $a4 and $t5, $t5, $t6 or $t4, $t4, $t5 - st.d $s6, $sp, 96 # 8-byte Folded Spill - sltu $t5, $s6, $a7 + st.d $s5, $sp, 96 # 8-byte Folded Spill + sltu $t5, $s5, $a7 sltu $t6, $t3, $a4 and $t5, $t5, $t6 or $t4, $t4, $t5 - sltu $t5, $s8, $fp + sltu $t5, $s7, $fp sltu $t6, $ra, $a3 and $t5, $t5, $t6 or $t4, $t4, $t5 - sltu $t5, $s8, $t2 + sltu $t5, $s7, $t2 sltu $t6, $t1, $a3 and $t5, $t5, $t6 or $t4, $t4, $t5 - st.d $s8, $sp, 88 # 8-byte Folded Spill - sltu $t5, $s8, $a7 + st.d $s7, $sp, 88 # 8-byte Folded Spill + sltu $t5, $s7, $a7 sltu $t6, $t3, $a3 and $t5, $t5, $t6 or $t4, $t4, $t5 @@ -106,7 +117,7 @@ srad_kernel: # @srad_kernel and $t1, $t1, $t2 or $t1, $t4, $t1 sltu $t2, $s4, $t3 - st.d $s2, $sp, 160 # 8-byte Folded Spill + st.d $s2, $sp, 152 # 8-byte Folded Spill sltu $t4, $s2, $a7 and $t2, $t2, $t4 addu16i.d $t4, $a0, 128 @@ -115,32 +126,32 @@ srad_kernel: # @srad_kernel sltu $t5, $t6, $a7 and $t4, $t4, $t5 or $t2, $t2, $t4 - addu16i.d $t4, $s7, 128 + addu16i.d $t4, $s8, 128 addi.d $t4, $t4, -4 sltu $t4, $s4, $t4 - add.d $s2, $s7, $s0 + add.d $s2, $s8, $s0 sltu $t5, $s2, $a7 and $t4, $t4, $t5 or $t2, $t2, $t4 addu16i.d $t4, $a2, 128 addi.d $t4, $t4, -4 sltu $t4, $s4, $t4 - add.d $s6, $a2, $s0 - sltu $t5, $s6, $a7 + add.d $s5, $a2, $s0 + sltu $t5, $s5, $a7 and $t4, $t4, $t5 or $t2, $t2, $t4 - addu16i.d $t4, $s5, 128 + addu16i.d $t4, $s6, 128 addi.d $t4, $t4, -4 st.d $s4, $sp, 72 # 8-byte Folded Spill sltu $t4, $s4, $t4 - add.d $s4, $s5, $s0 + add.d $s4, $s6, $s0 sltu $a7, $s4, $a7 and $a7, $t4, $a7 or $a7, $t2, $a7 st.d $a7, $sp, 64 # 8-byte Folded Spill addi.d $a7, $t3, -4 add.d $t3, $a5, $t7 - st.d $a7, $sp, 216 # 8-byte Folded Spill + st.d $a7, $sp, 208 # 8-byte Folded Spill sltu $a7, $a5, $a7 sltu $t2, $a6, $t3 and $a7, $a7, $t2 @@ -149,17 +160,17 @@ srad_kernel: # @srad_kernel and $t2, $t4, $t2 or $t2, $a7, $t2 sltu $a4, $a5, $a4 - sltu $a7, $s7, $t3 + sltu $a7, $s8, $t3 and $a4, $a4, $a7 or $a4, $t2, $a4 sltu $a3, $a5, $a3 sltu $a7, $a2, $t3 and $a3, $a3, $a7 add.d $a1, $a5, $a1 - st.d $a1, $sp, 224 # 8-byte Folded Spill + st.d $a1, $sp, 216 # 8-byte Folded Spill or $a1, $a4, $a3 sltu $a4, $a5, $fp - sltu $a3, $s5, $t3 + sltu $a3, $s6, $t3 and $a3, $a4, $a3 or $a1, $a1, $a3 st.d $a1, $sp, 240 # 8-byte Folded Spill @@ -170,8 +181,8 @@ srad_kernel: # @srad_kernel lu12i.w $a1, 231424 movgr2fr.w $fa1, $a1 vldi $vr2, -928 - lu52i.d $t2, $zero, -1029 - movgr2fr.d $fa3, $t2 + lu52i.d $a1, $zero, -1029 + movgr2fr.d $fa3, $a1 vldi $vr4, -944 vldi $vr5, -912 vldi $vr6, -1168 @@ -185,32 +196,32 @@ srad_kernel: # @srad_kernel add.d $a1, $a5, $s3 st.d $a1, $sp, 48 # 8-byte Folded Spill add.d $a1, $a6, $t0 - st.d $a1, $sp, 176 # 8-byte Folded Spill - add.d $a1, $s5, $t0 + st.d $a1, $sp, 168 # 8-byte Folded Spill + add.d $a1, $s6, $t0 st.d $a1, $sp, 40 # 8-byte Folded Spill - add.d $a1, $a0, $t0 - st.d $a1, $sp, 32 # 8-byte Folded Spill - add.d $a1, $a5, $t0 - st.d $a1, $sp, 24 # 8-byte Folded Spill - add.d $a1, $s7, $t0 - st.d $a1, $sp, 16 # 8-byte Folded Spill - add.d $a1, $a2, $t0 - st.d $a1, $sp, 8 # 8-byte Folded Spill + st.d $a0, $sp, 224 # 8-byte Folded Spill + add.d $a0, $a0, $t0 + st.d $a0, $sp, 32 # 8-byte Folded Spill + add.d $a0, $a5, $t0 + st.d $a0, $sp, 24 # 8-byte Folded Spill + add.d $a0, $s8, $t0 + st.d $a0, $sp, 16 # 8-byte Folded Spill + add.d $a0, $a2, $t0 + st.d $a0, $sp, 8 # 8-byte Folded Spill ori $t1, $zero, 512 - ori $a7, $zero, 128 - ori $t4, $zero, 4092 + ori $a4, $zero, 128 + ori $a7, $zero, 4092 lu12i.w $t3, -1 ori $t5, $zero, 4064 - st.d $a6, $sp, 152 # 8-byte Folded Spill - st.d $a5, $sp, 144 # 8-byte Folded Spill - st.d $s5, $sp, 136 # 8-byte Folded Spill - st.d $a2, $sp, 128 # 8-byte Folded Spill - st.d $s7, $sp, 120 # 8-byte Folded Spill - st.d $a0, $sp, 112 # 8-byte Folded Spill - st.d $t6, $sp, 208 # 8-byte Folded Spill - st.d $s2, $sp, 200 # 8-byte Folded Spill - st.d $s6, $sp, 192 # 8-byte Folded Spill - st.d $s4, $sp, 184 # 8-byte Folded Spill + st.d $a6, $sp, 144 # 8-byte Folded Spill + st.d $a5, $sp, 136 # 8-byte Folded Spill + st.d $s6, $sp, 128 # 8-byte Folded Spill + st.d $a2, $sp, 120 # 8-byte Folded Spill + st.d $s8, $sp, 112 # 8-byte Folded Spill + st.d $t6, $sp, 200 # 8-byte Folded Spill + st.d $s2, $sp, 192 # 8-byte Folded Spill + st.d $s5, $sp, 184 # 8-byte Folded Spill + st.d $s4, $sp, 176 # 8-byte Folded Spill .p2align 4, , 16 .LBB0_1: # %.preheader624 # =>This Loop Header: Depth=1 @@ -224,11 +235,10 @@ srad_kernel: # @srad_kernel # Child Loop BB0_59 Depth 2 # Child Loop BB0_62 Depth 3 # Child Loop BB0_65 Depth 3 - move $a4, $zero + move $a0, $zero move $a1, $a5 fmov.s $ft2, $fa0 fmov.s $ft1, $fa0 - lu52i.d $s3, $zero, -1175 .p2align 4, , 16 .LBB0_2: # %.preheader622 # Parent Loop BB0_1 Depth=1 @@ -246,16 +256,17 @@ srad_kernel: # @srad_kernel fadd.s $ft1, $ft1, $ft3 bne $a3, $t1, .LBB0_3 # %bb.4: # in Loop: Header=BB0_2 Depth=2 - addi.d $a4, $a4, 1 + addi.d $a0, $a0, 1 add.d $a1, $a1, $t0 - bne $a4, $a7, .LBB0_2 + bne $a0, $a4, .LBB0_2 # %bb.5: # in Loop: Header=BB0_1 Depth=1 fld.s $ft3, $a5, 0 fsub.s $ft4, $ft3, $ft3 + ld.d $a0, $sp, 224 # 8-byte Folded Reload fst.s $ft4, $a0, 0 fldx.s $ft4, $a5, $t0 fsub.s $ft4, $ft4, $ft3 - fst.s $ft4, $s7, 0 + fst.s $ft4, $s8, 0 fld.s $ft4, $a5, 0 fmul.s $ft2, $ft2, $fa1 fsub.s $ft4, $ft4, $ft3 @@ -265,9 +276,9 @@ srad_kernel: # @srad_kernel fmul.s $ft2, $ft2, $ft2 fsub.s $ft1, $ft1, $ft2 fsub.s $ft4, $ft4, $ft3 - fst.s $ft4, $s5, 0 + fst.s $ft4, $s6, 0 fld.s $ft5, $a0, 0 - fld.s $ft6, $s7, 0 + fld.s $ft6, $s8, 0 fdiv.s $ft1, $ft1, $ft2 fld.s $ft2, $a2, 0 fmul.s $ft7, $ft5, $ft5 @@ -307,7 +318,7 @@ srad_kernel: # @srad_kernel fcmp.clt.d $fcc0, $ft3, $fa7 fst.s $ft4, $a6, 0 fmov.s $ft4, $fa0 - ld.d $a1, $sp, 160 # 8-byte Folded Reload + ld.d $a1, $sp, 152 # 8-byte Folded Reload bcnez $fcc0, .LBB0_8 # %bb.6: # in Loop: Header=BB0_1 Depth=1 movgr2fr.d $ft4, $ra @@ -319,23 +330,24 @@ srad_kernel: # @srad_kernel # in Loop: Header=BB0_1 Depth=1 fst.s $ft4, $a6, 0 .LBB0_9: # in Loop: Header=BB0_1 Depth=1 - fldx.s $ft3, $a5, $t4 + fldx.s $ft3, $a5, $a7 fsub.s $ft4, $ft3, $ft3 - fstx.s $ft4, $a0, $t4 + ld.d $a0, $sp, 224 # 8-byte Folded Reload + fstx.s $ft4, $a0, $a7 ori $a3, $t0, 4092 fldx.s $ft4, $a5, $a3 fsub.s $ft4, $ft4, $ft3 - fstx.s $ft4, $s7, $t4 + fstx.s $ft4, $s8, $a7 ori $a3, $zero, 4088 fldx.s $ft4, $a5, $a3 fsub.s $ft4, $ft4, $ft3 - fstx.s $ft4, $a2, $t4 - fldx.s $ft4, $a5, $t4 + fstx.s $ft4, $a2, $a7 + fldx.s $ft4, $a5, $a7 fsub.s $ft4, $ft4, $ft3 - fstx.s $ft4, $s5, $t4 - fldx.s $ft5, $a0, $t4 - fldx.s $ft6, $s7, $t4 - fldx.s $ft7, $a2, $t4 + fstx.s $ft4, $s6, $a7 + fldx.s $ft5, $a0, $a7 + fldx.s $ft6, $s8, $a7 + fldx.s $ft7, $a2, $a7 fmul.s $ft8, $ft5, $ft5 fmul.s $ft9, $ft6, $ft6 fadd.s $ft8, $ft8, $ft9 @@ -369,7 +381,7 @@ srad_kernel: # @srad_kernel frecip.d $ft3, $ft3 fcvt.s.d $ft4, $ft3 fcmp.clt.d $fcc0, $ft3, $fa7 - fstx.s $ft4, $a6, $t4 + fstx.s $ft4, $a6, $a7 fmov.s $ft4, $fa0 bcnez $fcc0, .LBB0_12 # %bb.10: # in Loop: Header=BB0_1 Depth=1 @@ -380,26 +392,26 @@ srad_kernel: # @srad_kernel vldi $vr12, -1168 .LBB0_12: # %.sink.split663 # in Loop: Header=BB0_1 Depth=1 - fstx.s $ft4, $a6, $t4 + fstx.s $ft4, $a6, $a7 .LBB0_13: # in Loop: Header=BB0_1 Depth=1 - ld.d $a4, $sp, 224 # 8-byte Folded Reload - fldx.s $ft3, $a4, $t0 - fld.s $ft4, $a4, 0 + ld.d $a3, $sp, 216 # 8-byte Folded Reload + fldx.s $ft3, $a3, $t0 + fld.s $ft4, $a3, 0 fsub.s $ft4, $ft4, $ft3 fst.s $ft4, $t6, 0 - fldx.s $ft4, $a4, $t0 + fldx.s $ft4, $a3, $t0 fsub.s $ft4, $ft4, $ft3 fst.s $ft4, $s2, 0 - fldx.s $ft4, $a4, $t0 + fldx.s $ft4, $a3, $t0 fsub.s $ft4, $ft4, $ft3 - fst.s $ft4, $s6, 0 - ori $a3, $t0, 4 - fldx.s $ft4, $a4, $a3 + fst.s $ft4, $s5, 0 + ori $a0, $t0, 4 + fldx.s $ft4, $a3, $a0 fsub.s $ft4, $ft4, $ft3 fst.s $ft4, $s4, 0 fld.s $ft5, $t6, 0 fld.s $ft6, $s2, 0 - fld.s $ft7, $s6, 0 + fld.s $ft7, $s5, 0 fmul.s $ft8, $ft5, $ft5 fmul.s $ft9, $ft6, $ft6 fadd.s $ft8, $ft8, $ft9 @@ -446,25 +458,25 @@ srad_kernel: # @srad_kernel # in Loop: Header=BB0_1 Depth=1 fst.s $ft4, $a1, 0 .LBB0_17: # in Loop: Header=BB0_1 Depth=1 - ld.d $a1, $sp, 224 # 8-byte Folded Reload - ori $a3, $t0, 4092 - fldx.s $ft3, $a1, $a3 - fldx.s $ft4, $a1, $t4 + ld.d $a0, $sp, 216 # 8-byte Folded Reload + ori $a1, $t0, 4092 + fldx.s $ft3, $a0, $a1 + fldx.s $ft4, $a0, $a7 fsub.s $ft4, $ft4, $ft3 - fstx.s $ft4, $t6, $t4 - fldx.s $ft4, $a1, $a3 + fstx.s $ft4, $t6, $a7 + fldx.s $ft4, $a0, $a1 fsub.s $ft4, $ft4, $ft3 - fstx.s $ft4, $s2, $t4 + fstx.s $ft4, $s2, $a7 ori $a2, $t0, 4088 - fldx.s $ft4, $a1, $a2 + fldx.s $ft4, $a0, $a2 fsub.s $ft4, $ft4, $ft3 - fstx.s $ft4, $s6, $t4 - fldx.s $ft4, $a1, $a3 + fstx.s $ft4, $s5, $a7 + fldx.s $ft4, $a0, $a1 fsub.s $ft4, $ft4, $ft3 - fstx.s $ft4, $s4, $t4 - fldx.s $ft5, $t6, $t4 - fldx.s $ft6, $s2, $t4 - fldx.s $ft7, $s6, $t4 + fstx.s $ft4, $s4, $a7 + fldx.s $ft5, $t6, $a7 + fldx.s $ft6, $s2, $a7 + fldx.s $ft7, $s5, $a7 fmul.s $ft8, $ft5, $ft5 fmul.s $ft9, $ft6, $ft6 fadd.s $ft8, $ft8, $ft9 @@ -498,105 +510,104 @@ srad_kernel: # @srad_kernel frecip.d $ft3, $ft3 fcvt.s.d $ft4, $ft3 fcmp.clt.d $fcc0, $ft3, $fa7 - ld.d $a1, $sp, 216 # 8-byte Folded Reload - fst.s $ft4, $a1, 0 + ld.d $a0, $sp, 208 # 8-byte Folded Reload + fst.s $ft4, $a0, 0 bceqz $fcc0, .LBB0_19 # %bb.18: # in Loop: Header=BB0_1 Depth=1 movgr2fr.w $ft3, $zero - ori $s5, $zero, 4068 b .LBB0_21 .p2align 4, , 16 .LBB0_19: # in Loop: Header=BB0_1 Depth=1 movgr2fr.d $ft4, $ra fcmp.cule.d $fcc0, $ft3, $ft4 - ori $s5, $zero, 4068 bcnez $fcc0, .LBB0_22 # %bb.20: # in Loop: Header=BB0_1 Depth=1 vldi $vr11, -1168 .LBB0_21: # %.sink.split667 # in Loop: Header=BB0_1 Depth=1 - ld.d $a1, $sp, 216 # 8-byte Folded Reload - fst.s $ft3, $a1, 0 + ld.d $a0, $sp, 208 # 8-byte Folded Reload + fst.s $ft3, $a0, 0 .LBB0_22: # %.preheader621.preheader # in Loop: Header=BB0_1 Depth=1 - st.d $s1, $sp, 168 # 8-byte Folded Spill + st.d $s1, $sp, 160 # 8-byte Folded Spill xvreplve0.w $xr11, $xr9 xvreplve0.w $xr12, $xr10 - ori $a1, $zero, 1 - ld.d $t4, $sp, 8 # 8-byte Folded Reload - ld.d $a3, $sp, 16 # 8-byte Folded Reload - ld.d $a4, $sp, 24 # 8-byte Folded Reload - ld.d $s6, $sp, 32 # 8-byte Folded Reload - ld.d $s7, $sp, 40 # 8-byte Folded Reload - ld.d $s2, $sp, 176 # 8-byte Folded Reload - ld.d $a7, $sp, 48 # 8-byte Folded Reload - ld.d $t7, $sp, 104 # 8-byte Folded Reload - ld.d $s0, $sp, 96 # 8-byte Folded Reload - ld.d $t6, $sp, 88 # 8-byte Folded Reload - ld.d $t8, $sp, 80 # 8-byte Folded Reload - move $fp, $a6 + ori $a0, $zero, 1 + ld.d $a3, $sp, 8 # 8-byte Folded Reload + ld.d $a2, $sp, 16 # 8-byte Folded Reload + ld.d $s6, $sp, 24 # 8-byte Folded Reload + ld.d $s7, $sp, 32 # 8-byte Folded Reload + ld.d $s2, $sp, 40 # 8-byte Folded Reload + ld.d $s1, $sp, 168 # 8-byte Folded Reload + ld.d $t7, $sp, 48 # 8-byte Folded Reload + ld.d $s0, $sp, 104 # 8-byte Folded Reload + ld.d $t6, $sp, 96 # 8-byte Folded Reload + ld.d $t8, $sp, 88 # 8-byte Folded Reload + ld.d $fp, $sp, 80 # 8-byte Folded Reload + move $s4, $a6 b .LBB0_24 .p2align 4, , 16 .LBB0_23: # in Loop: Header=BB0_24 Depth=2 - addi.d $a1, $a1, 1 + addi.d $a0, $a0, 1 + add.d $s4, $s4, $t0 add.d $fp, $fp, $t0 add.d $t8, $t8, $t0 add.d $t6, $t6, $t0 add.d $s0, $s0, $t0 add.d $t7, $t7, $t0 - add.d $a7, $a7, $t0 + add.d $s1, $s1, $t0 add.d $s2, $s2, $t0 add.d $s7, $s7, $t0 add.d $s6, $s6, $t0 - add.d $a4, $a4, $t0 + add.d $a2, $a2, $t0 add.d $a3, $a3, $t0 - add.d $t4, $t4, $t0 - lu52i.d $t2, $zero, -1029 - lu52i.d $s3, $zero, -1175 - ori $a0, $zero, 2047 - ori $s5, $zero, 4068 - beq $a1, $a0, .LBB0_51 + ori $a1, $zero, 2047 + beq $a0, $a1, .LBB0_51 .LBB0_24: # %.preheader621 # Parent Loop BB0_1 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB0_27 Depth 3 # Child Loop BB0_48 Depth 3 - ori $a0, $zero, 1 - ld.d $a5, $sp, 232 # 8-byte Folded Reload - bnez $a5, .LBB0_44 + pcalau12i $a4, %pc_hi20(.LCPI0_0) + ori $a5, $zero, 1 + ld.d $a1, $sp, 232 # 8-byte Folded Reload + bnez $a1, .LBB0_44 # %bb.25: # %vector.body814.preheader # in Loop: Header=BB0_24 Depth=2 - ori $s4, $t3, 32 + ori $s8, $t3, 32 + lu52i.d $a1, $zero, -1029 + lu52i.d $t2, $zero, -1175 + ori $s3, $zero, 4068 + ori $s5, $zero, 4060 b .LBB0_27 .p2align 4, , 16 .LBB0_26: # %pred.store.continue837 # in Loop: Header=BB0_27 Depth=3 - addi.d $s4, $s4, 32 - beqz $s4, .LBB0_43 + addi.d $s8, $s8, 32 + beqz $s8, .LBB0_43 .LBB0_27: # %vector.body814 # Parent Loop BB0_1 Depth=1 # Parent Loop BB0_24 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $a0, $a7, $s4 - xvldx $xr13, $a0, $t5 - xvld $xr14, $a0, -32 - ori $a5, $t0, 4064 - xvldx $xr15, $a0, $a5 + add.d $a5, $t7, $s8 + xvldx $xr13, $a5, $t5 + xvld $xr14, $a5, -32 + ori $a6, $t0, 4064 + xvldx $xr15, $a5, $a6 xvfsub.s $xr14, $xr14, $xr13 - add.d $a5, $t7, $s4 - xvstx $xr14, $a5, $t5 + add.d $a6, $s0, $s8 + xvstx $xr14, $a6, $t5 xvfsub.s $xr15, $xr15, $xr13 - ori $a5, $zero, 4060 - xvldx $xr16, $a0, $a5 - add.d $a5, $s0, $s4 - xvstx $xr15, $a5, $t5 - xvldx $xr17, $a0, $s5 + xvldx $xr16, $a5, $s5 + add.d $a6, $t6, $s8 + xvstx $xr15, $a6, $t5 + xvldx $xr17, $a5, $s3 xvfsub.s $xr16, $xr16, $xr13 - add.d $a0, $t6, $s4 - xvstx $xr16, $a0, $t5 + add.d $a5, $t8, $s8 + xvstx $xr16, $a5, $t5 xvfsub.s $xr17, $xr17, $xr13 - add.d $a0, $t8, $s4 - xvstx $xr17, $a0, $t5 + add.d $a5, $fp, $s8 + xvstx $xr17, $a5, $t5 xvfmul.s $xr18, $xr14, $xr14 xvfmul.s $xr19, $xr15, $xr15 xvfadd.s $xr18, $xr18, $xr19 @@ -610,398 +621,336 @@ srad_kernel: # @srad_kernel xvfadd.s $xr14, $xr14, $xr16 xvfadd.s $xr14, $xr17, $xr14 xvfdiv.s $xr13, $xr14, $xr13 + vreplvei.w $vr14, $vr18, 3 + fcvt.d.s $ft6, $ft6 + vreplvei.w $vr15, $vr18, 2 + fcvt.d.s $ft7, $ft7 + vextrins.d $vr15, $vr14, 16 + vreplvei.w $vr14, $vr18, 1 + fcvt.d.s $ft6, $ft6 + vreplvei.w $vr16, $vr18, 0 + fcvt.d.s $ft8, $ft8 + vextrins.d $vr16, $vr14, 16 + xvpermi.q $xr16, $xr15, 2 xvpermi.q $xr14, $xr18, 1 vreplvei.w $vr15, $vr14, 3 fcvt.d.s $ft7, $ft7 - vreplvei.w $vr16, $vr14, 2 - fcvt.d.s $ft8, $ft8 - vextrins.d $vr16, $vr15, 16 + vreplvei.w $vr17, $vr14, 2 + fcvt.d.s $ft9, $ft9 + vextrins.d $vr17, $vr15, 16 vreplvei.w $vr15, $vr14, 1 fcvt.d.s $ft7, $ft7 vreplvei.w $vr14, $vr14, 0 fcvt.d.s $ft6, $ft6 vextrins.d $vr14, $vr15, 16 - xvpermi.q $xr14, $xr16, 2 - vreplvei.w $vr15, $vr18, 3 - fcvt.d.s $ft7, $ft7 - vreplvei.w $vr16, $vr18, 2 - fcvt.d.s $ft8, $ft8 - vextrins.d $vr16, $vr15, 16 - vreplvei.w $vr15, $vr18, 1 - fcvt.d.s $ft7, $ft7 - vreplvei.w $vr17, $vr18, 0 - fcvt.d.s $ft9, $ft9 - vextrins.d $vr17, $vr15, 16 - xvpermi.q $xr17, $xr16, 2 + xvpermi.q $xr14, $xr17, 2 xvldi $xr15, -928 - xvfmul.d $xr16, $xr17, $xr15 xvfmul.d $xr14, $xr14, $xr15 - xvfmul.s $xr15, $xr13, $xr13 - xvpermi.q $xr17, $xr15, 1 - vreplvei.w $vr18, $vr17, 3 - fcvt.d.s $ft10, $ft10 - vreplvei.w $vr19, $vr17, 2 - fcvt.d.s $ft11, $ft11 - vextrins.d $vr19, $vr18, 16 - vreplvei.w $vr18, $vr17, 1 - fcvt.d.s $ft10, $ft10 - vreplvei.w $vr17, $vr17, 0 + xvfmul.d $xr15, $xr16, $xr15 + xvfmul.s $xr16, $xr13, $xr13 + vreplvei.w $vr17, $vr16, 3 fcvt.d.s $ft9, $ft9 - vextrins.d $vr17, $vr18, 16 - xvpermi.q $xr17, $xr19, 2 - vreplvei.w $vr18, $vr15, 3 + vreplvei.w $vr18, $vr16, 2 fcvt.d.s $ft10, $ft10 - vreplvei.w $vr19, $vr15, 2 + vextrins.d $vr18, $vr17, 16 + vreplvei.w $vr17, $vr16, 1 + fcvt.d.s $ft9, $ft9 + vreplvei.w $vr19, $vr16, 0 fcvt.d.s $ft11, $ft11 - vextrins.d $vr19, $vr18, 16 - vreplvei.w $vr18, $vr15, 1 + vextrins.d $vr19, $vr17, 16 + xvpermi.q $xr19, $xr18, 2 + xvpermi.q $xr16, $xr16, 1 + vreplvei.w $vr17, $vr16, 3 + fcvt.d.s $ft9, $ft9 + vreplvei.w $vr18, $vr16, 2 fcvt.d.s $ft10, $ft10 - vreplvei.w $vr15, $vr15, 0 - fcvt.d.s $ft7, $ft7 - vextrins.d $vr15, $vr18, 16 - xvpermi.q $xr15, $xr19, 2 - xvreplgr2vr.d $xr18, $t2 - xvfmul.d $xr15, $xr15, $xr18 - xvfmul.d $xr17, $xr17, $xr18 - xvfadd.d $xr17, $xr14, $xr17 - xvfadd.d $xr15, $xr16, $xr15 - xvpickve.d $xr14, $xr15, 1 - fcvt.s.d $ft8, $ft6 - xvpickve.d $xr14, $xr15, 0 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr14, $vr16, 16 - xvpickve.d $xr16, $xr15, 2 - fcvt.s.d $ft8, $ft8 - vextrins.w $vr14, $vr16, 32 - xvpickve.d $xr15, $xr15, 3 - fcvt.s.d $ft7, $ft7 - vextrins.w $vr14, $vr15, 48 - xvpickve.d $xr15, $xr17, 1 - fcvt.s.d $ft7, $ft7 - xvpickve.d $xr16, $xr17, 0 - fcvt.s.d $ft8, $ft8 - vextrins.w $vr16, $vr15, 16 - xvpickve.d $xr15, $xr17, 2 - fcvt.s.d $ft7, $ft7 - vextrins.w $vr16, $vr15, 32 - xvpickve.d $xr15, $xr17, 3 - fcvt.s.d $ft7, $ft7 - vextrins.w $vr16, $vr15, 48 - xvpermi.q $xr14, $xr16, 2 - xvpermi.q $xr15, $xr13, 1 - vreplvei.w $vr16, $vr15, 3 - fcvt.d.s $ft8, $ft8 - vreplvei.w $vr17, $vr15, 2 + vextrins.d $vr18, $vr17, 16 + vreplvei.w $vr17, $vr16, 1 fcvt.d.s $ft9, $ft9 - vextrins.d $vr17, $vr16, 16 - vreplvei.w $vr16, $vr15, 1 + vreplvei.w $vr16, $vr16, 0 fcvt.d.s $ft8, $ft8 - vreplvei.w $vr15, $vr15, 0 + vextrins.d $vr16, $vr17, 16 + xvpermi.q $xr16, $xr18, 2 + xvreplgr2vr.d $xr17, $a1 + xvfmul.d $xr16, $xr16, $xr17 + xvfmul.d $xr17, $xr19, $xr17 + xvfadd.d $xr15, $xr15, $xr17 + xvfadd.d $xr14, $xr14, $xr16 + xvfcvt.s.d $xr14, $xr14, $xr15 + vreplvei.w $vr15, $vr13, 3 fcvt.d.s $ft7, $ft7 - vextrins.d $vr15, $vr16, 16 - xvpermi.q $xr15, $xr17, 2 - vreplvei.w $vr16, $vr13, 3 + vreplvei.w $vr16, $vr13, 2 fcvt.d.s $ft8, $ft8 - vreplvei.w $vr17, $vr13, 2 + vextrins.d $vr16, $vr15, 16 + vreplvei.w $vr15, $vr13, 1 + fcvt.d.s $ft7, $ft7 + vreplvei.w $vr17, $vr13, 0 fcvt.d.s $ft9, $ft9 - vextrins.d $vr17, $vr16, 16 - vreplvei.w $vr16, $vr13, 1 + vextrins.d $vr17, $vr15, 16 + xvpermi.q $xr17, $xr16, 2 + xvpermi.q $xr13, $xr13, 1 + vreplvei.w $vr15, $vr13, 3 + fcvt.d.s $ft7, $ft7 + vreplvei.w $vr16, $vr13, 2 fcvt.d.s $ft8, $ft8 + vextrins.d $vr16, $vr15, 16 + vreplvei.w $vr15, $vr13, 1 + fcvt.d.s $ft7, $ft7 vreplvei.w $vr13, $vr13, 0 fcvt.d.s $ft5, $ft5 - vextrins.d $vr13, $vr16, 16 - xvpermi.q $xr13, $xr17, 2 - xvldi $xr16, -944 - xvfmul.d $xr17, $xr13, $xr16 - xvfmul.d $xr15, $xr15, $xr16 + vextrins.d $vr13, $vr15, 16 + xvpermi.q $xr13, $xr16, 2 + xvldi $xr15, -944 + xvfmul.d $xr16, $xr13, $xr15 + xvfmul.d $xr15, $xr17, $xr15 xvldi $xr13, -912 xvfadd.d $xr15, $xr15, $xr13 - xvfadd.d $xr16, $xr17, $xr13 - xvpickve.d $xr17, $xr16, 1 - fcvt.s.d $ft9, $ft9 - xvpickve.d $xr18, $xr16, 0 - fcvt.s.d $ft10, $ft10 - vextrins.w $vr18, $vr17, 16 - xvpickve.d $xr17, $xr16, 2 - fcvt.s.d $ft9, $ft9 - vextrins.w $vr18, $vr17, 32 - xvpickve.d $xr16, $xr16, 3 - fcvt.s.d $ft8, $ft8 - vextrins.w $vr18, $vr16, 48 - xvpickve.d $xr16, $xr15, 1 - fcvt.s.d $ft8, $ft8 - xvpickve.d $xr17, $xr15, 0 - fcvt.s.d $ft9, $ft9 - vextrins.w $vr17, $vr16, 16 - xvpickve.d $xr16, $xr15, 2 - fcvt.s.d $ft8, $ft8 - vextrins.w $vr17, $vr16, 32 - xvpickve.d $xr15, $xr15, 3 - fcvt.s.d $ft7, $ft7 - vextrins.w $vr17, $vr15, 48 - xvpermi.q $xr18, $xr17, 2 - xvfmul.s $xr15, $xr18, $xr18 + xvfadd.d $xr16, $xr16, $xr13 + xvld $xr17, $a4, %pc_lo12(.LCPI0_0) + xvfcvt.s.d $xr15, $xr16, $xr15 + xvfmul.s $xr15, $xr15, $xr15 xvfdiv.s $xr14, $xr14, $xr15 + xvperm.w $xr14, $xr14, $xr17 xvfsub.s $xr14, $xr14, $xr11 xvfdiv.s $xr14, $xr14, $xr12 - xvpermi.q $xr15, $xr14, 1 - vreplvei.w $vr16, $vr15, 3 - fcvt.d.s $ft8, $ft8 - vreplvei.w $vr17, $vr15, 2 - fcvt.d.s $ft9, $ft9 - vextrins.d $vr17, $vr16, 16 - vreplvei.w $vr16, $vr15, 1 - fcvt.d.s $ft8, $ft8 - vreplvei.w $vr15, $vr15, 0 + vreplvei.w $vr15, $vr14, 3 fcvt.d.s $ft7, $ft7 - vextrins.d $vr15, $vr16, 16 - xvpermi.q $xr15, $xr17, 2 - vreplvei.w $vr16, $vr14, 3 + vreplvei.w $vr16, $vr14, 2 fcvt.d.s $ft8, $ft8 - vreplvei.w $vr17, $vr14, 2 - fcvt.d.s $ft9, $ft9 - vextrins.d $vr17, $vr16, 16 - vreplvei.w $vr16, $vr14, 1 + vextrins.d $vr16, $vr15, 16 + vreplvei.w $vr15, $vr14, 1 + fcvt.d.s $ft7, $ft7 + vreplvei.w $vr18, $vr14, 0 + fcvt.d.s $ft10, $ft10 + vextrins.d $vr18, $vr15, 16 + xvpermi.q $xr18, $xr16, 2 + xvpermi.q $xr14, $xr14, 1 + vreplvei.w $vr15, $vr14, 3 + fcvt.d.s $ft7, $ft7 + vreplvei.w $vr16, $vr14, 2 fcvt.d.s $ft8, $ft8 + vextrins.d $vr16, $vr15, 16 + vreplvei.w $vr15, $vr14, 1 + fcvt.d.s $ft7, $ft7 vreplvei.w $vr14, $vr14, 0 fcvt.d.s $ft6, $ft6 - vextrins.d $vr14, $vr16, 16 - xvpermi.q $xr14, $xr17, 2 + vextrins.d $vr14, $vr15, 16 + xvpermi.q $xr14, $xr16, 2 xvfadd.d $xr14, $xr14, $xr13 - xvfadd.d $xr13, $xr15, $xr13 + xvfadd.d $xr13, $xr18, $xr13 xvfrecip.d $xr13, $xr13 xvfrecip.d $xr14, $xr14 - xvpickve.d $xr15, $xr14, 1 - fcvt.s.d $ft7, $ft7 - xvpickve.d $xr16, $xr14, 0 - fcvt.s.d $ft8, $ft8 - vextrins.w $vr16, $vr15, 16 - xvpickve.d $xr15, $xr14, 2 - fcvt.s.d $ft7, $ft7 - vextrins.w $vr16, $vr15, 32 - xvpickve.d $xr15, $xr14, 3 - fcvt.s.d $ft7, $ft7 - vextrins.w $vr16, $vr15, 48 - xvpickve.d $xr15, $xr13, 1 - fcvt.s.d $ft7, $ft7 - xvpickve.d $xr17, $xr13, 0 - fcvt.s.d $ft9, $ft9 - vextrins.w $vr17, $vr15, 16 - xvpickve.d $xr15, $xr13, 2 - fcvt.s.d $ft7, $ft7 - vextrins.w $vr17, $vr15, 32 - xvpickve.d $xr15, $xr13, 3 - fcvt.s.d $ft7, $ft7 - vextrins.w $vr17, $vr15, 48 - xvpermi.q $xr16, $xr17, 2 - add.d $s8, $fp, $s4 - ori $a0, $t0, 4068 - xvstx $xr16, $s8, $a0 - xvreplgr2vr.d $xr15, $s3 - xvfcmp.clt.d $xr16, $xr14, $xr15 - xvpickve2gr.d $a5, $xr16, 0 - vinsgr2vr.w $vr17, $a5, 0 - xvpickve2gr.d $a5, $xr16, 1 - vinsgr2vr.w $vr17, $a5, 1 - xvpickve2gr.d $a5, $xr16, 2 - vinsgr2vr.w $vr17, $a5, 2 - xvpickve2gr.d $a5, $xr16, 3 - vinsgr2vr.w $vr17, $a5, 3 - xvfcmp.clt.d $xr15, $xr13, $xr15 - xvpickve2gr.d $a5, $xr15, 0 - vinsgr2vr.w $vr16, $a5, 0 - xvpickve2gr.d $a5, $xr15, 1 - vinsgr2vr.w $vr16, $a5, 1 - xvpickve2gr.d $a5, $xr15, 2 - vinsgr2vr.w $vr16, $a5, 2 - xvpickve2gr.d $a5, $xr15, 3 - vinsgr2vr.w $vr16, $a5, 3 + xvfcvt.s.d $xr15, $xr14, $xr13 + xvperm.w $xr15, $xr15, $xr17 + add.d $t4, $s4, $s8 + ori $a5, $t0, 4068 + xvstx $xr15, $t4, $a5 + xvreplgr2vr.d $xr15, $t2 + xvfcmp.clt.d $xr16, $xr13, $xr15 + xvpickve2gr.d $a6, $xr16, 0 + vinsgr2vr.w $vr17, $a6, 0 + xvpickve2gr.d $a6, $xr16, 1 + vinsgr2vr.w $vr17, $a6, 1 + xvpickve2gr.d $a6, $xr16, 2 + vinsgr2vr.w $vr17, $a6, 2 + xvpickve2gr.d $a6, $xr16, 3 + vinsgr2vr.w $vr17, $a6, 3 + xvfcmp.clt.d $xr15, $xr14, $xr15 + xvpickve2gr.d $a6, $xr15, 0 + vinsgr2vr.w $vr16, $a6, 0 + xvpickve2gr.d $a6, $xr15, 1 + vinsgr2vr.w $vr16, $a6, 1 + xvpickve2gr.d $a6, $xr15, 2 + vinsgr2vr.w $vr16, $a6, 2 + xvpickve2gr.d $a6, $xr15, 3 + vinsgr2vr.w $vr16, $a6, 3 xvpermi.q $xr17, $xr16, 2 xvreplgr2vr.d $xr15, $ra - xvfcmp.clt.d $xr14, $xr15, $xr14 - xvpickve2gr.d $a5, $xr14, 0 - vinsgr2vr.w $vr16, $a5, 0 - xvpickve2gr.d $a5, $xr14, 1 - vinsgr2vr.w $vr16, $a5, 1 - xvpickve2gr.d $a5, $xr14, 2 - vinsgr2vr.w $vr16, $a5, 2 - xvpickve2gr.d $a5, $xr14, 3 - vinsgr2vr.w $vr16, $a5, 3 xvfcmp.clt.d $xr13, $xr15, $xr13 - xvpickve2gr.d $a5, $xr13, 0 - vinsgr2vr.w $vr14, $a5, 0 - xvpickve2gr.d $a5, $xr13, 1 - vinsgr2vr.w $vr14, $a5, 1 - xvpickve2gr.d $a5, $xr13, 2 - vinsgr2vr.w $vr14, $a5, 2 - xvpickve2gr.d $a5, $xr13, 3 - vinsgr2vr.w $vr14, $a5, 3 + xvpickve2gr.d $a6, $xr13, 0 + vinsgr2vr.w $vr16, $a6, 0 + xvpickve2gr.d $a6, $xr13, 1 + vinsgr2vr.w $vr16, $a6, 1 + xvpickve2gr.d $a6, $xr13, 2 + vinsgr2vr.w $vr16, $a6, 2 + xvpickve2gr.d $a6, $xr13, 3 + vinsgr2vr.w $vr16, $a6, 3 + xvfcmp.clt.d $xr13, $xr15, $xr14 + xvpickve2gr.d $a6, $xr13, 0 + vinsgr2vr.w $vr14, $a6, 0 + xvpickve2gr.d $a6, $xr13, 1 + vinsgr2vr.w $vr14, $a6, 1 + xvpickve2gr.d $a6, $xr13, 2 + vinsgr2vr.w $vr14, $a6, 2 + xvpickve2gr.d $a6, $xr13, 3 + vinsgr2vr.w $vr14, $a6, 3 xvpermi.q $xr16, $xr14, 2 xvor.v $xr15, $xr16, $xr17 - xvpickve2gr.w $a5, $xr15, 0 + xvpickve2gr.w $a6, $xr15, 0 xvldi $xr13, -1424 - andi $a6, $a5, 1 + andi $a7, $a6, 1 xvandn.v $xr13, $xr17, $xr13 - beqz $a6, .LBB0_29 + beqz $a7, .LBB0_29 # %bb.28: # %pred.store.if # in Loop: Header=BB0_27 Depth=3 - add.d $a0, $s8, $a0 - xvstelm.w $xr13, $a0, 0, 0 + add.d $a5, $t4, $a5 + xvstelm.w $xr13, $a5, 0, 0 .LBB0_29: # %pred.store.continue # in Loop: Header=BB0_27 Depth=3 - vinsgr2vr.h $vr14, $a5, 0 - xvpickve2gr.w $a0, $xr15, 1 - vinsgr2vr.h $vr14, $a0, 1 - xvpickve2gr.w $a0, $xr15, 2 - vinsgr2vr.h $vr14, $a0, 2 - xvpickve2gr.w $a0, $xr15, 3 - vinsgr2vr.h $vr14, $a0, 3 - xvpickve2gr.w $a0, $xr15, 4 - vinsgr2vr.h $vr14, $a0, 4 - xvpickve2gr.w $a0, $xr15, 5 - vinsgr2vr.h $vr14, $a0, 5 - xvpickve2gr.w $a0, $xr15, 6 - vinsgr2vr.h $vr14, $a0, 6 - xvpickve2gr.w $a0, $xr15, 7 - vinsgr2vr.h $vr14, $a0, 7 - vpickve2gr.h $a0, $vr14, 1 - andi $a0, $a0, 1 - bnez $a0, .LBB0_36 + vinsgr2vr.h $vr14, $a6, 0 + xvpickve2gr.w $a5, $xr15, 1 + vinsgr2vr.h $vr14, $a5, 1 + xvpickve2gr.w $a5, $xr15, 2 + vinsgr2vr.h $vr14, $a5, 2 + xvpickve2gr.w $a5, $xr15, 3 + vinsgr2vr.h $vr14, $a5, 3 + xvpickve2gr.w $a5, $xr15, 4 + vinsgr2vr.h $vr14, $a5, 4 + xvpickve2gr.w $a5, $xr15, 5 + vinsgr2vr.h $vr14, $a5, 5 + xvpickve2gr.w $a5, $xr15, 6 + vinsgr2vr.h $vr14, $a5, 6 + xvpickve2gr.w $a5, $xr15, 7 + vinsgr2vr.h $vr14, $a5, 7 + vpickve2gr.h $a5, $vr14, 1 + andi $a5, $a5, 1 + bnez $a5, .LBB0_36 # %bb.30: # %pred.store.continue825 # in Loop: Header=BB0_27 Depth=3 - vpickve2gr.h $a0, $vr14, 2 - andi $a0, $a0, 1 - bnez $a0, .LBB0_37 + vpickve2gr.h $a5, $vr14, 2 + andi $a5, $a5, 1 + bnez $a5, .LBB0_37 .LBB0_31: # %pred.store.continue827 # in Loop: Header=BB0_27 Depth=3 - vpickve2gr.h $a0, $vr14, 3 - andi $a0, $a0, 1 - bnez $a0, .LBB0_38 + vpickve2gr.h $a5, $vr14, 3 + andi $a5, $a5, 1 + bnez $a5, .LBB0_38 .LBB0_32: # %pred.store.continue829 # in Loop: Header=BB0_27 Depth=3 - vpickve2gr.h $a0, $vr14, 4 - andi $a0, $a0, 1 - bnez $a0, .LBB0_39 + vpickve2gr.h $a5, $vr14, 4 + andi $a5, $a5, 1 + bnez $a5, .LBB0_39 .LBB0_33: # %pred.store.continue831 # in Loop: Header=BB0_27 Depth=3 - vpickve2gr.h $a0, $vr14, 5 - andi $a0, $a0, 1 - bnez $a0, .LBB0_40 + vpickve2gr.h $a5, $vr14, 5 + andi $a5, $a5, 1 + bnez $a5, .LBB0_40 .LBB0_34: # %pred.store.continue833 # in Loop: Header=BB0_27 Depth=3 - vpickve2gr.h $a0, $vr14, 6 - andi $a0, $a0, 1 - bnez $a0, .LBB0_41 + vpickve2gr.h $a5, $vr14, 6 + andi $a5, $a5, 1 + bnez $a5, .LBB0_41 .LBB0_35: # %pred.store.continue835 # in Loop: Header=BB0_27 Depth=3 - vpickve2gr.h $a0, $vr14, 7 - andi $a0, $a0, 1 - beqz $a0, .LBB0_26 + vpickve2gr.h $a5, $vr14, 7 + andi $a5, $a5, 1 + beqz $a5, .LBB0_26 b .LBB0_42 .p2align 4, , 16 .LBB0_36: # %pred.store.if824 # in Loop: Header=BB0_27 Depth=3 - ori $a0, $t0, 4072 - add.d $a0, $s8, $a0 - xvstelm.w $xr13, $a0, 0, 1 - vpickve2gr.h $a0, $vr14, 2 - andi $a0, $a0, 1 - beqz $a0, .LBB0_31 + ori $a5, $t0, 4072 + add.d $a5, $t4, $a5 + xvstelm.w $xr13, $a5, 0, 1 + vpickve2gr.h $a5, $vr14, 2 + andi $a5, $a5, 1 + beqz $a5, .LBB0_31 .LBB0_37: # %pred.store.if826 # in Loop: Header=BB0_27 Depth=3 - ori $a0, $t0, 4076 - add.d $a0, $s8, $a0 - xvstelm.w $xr13, $a0, 0, 2 - vpickve2gr.h $a0, $vr14, 3 - andi $a0, $a0, 1 - beqz $a0, .LBB0_32 + ori $a5, $t0, 4076 + add.d $a5, $t4, $a5 + xvstelm.w $xr13, $a5, 0, 2 + vpickve2gr.h $a5, $vr14, 3 + andi $a5, $a5, 1 + beqz $a5, .LBB0_32 .LBB0_38: # %pred.store.if828 # in Loop: Header=BB0_27 Depth=3 - ori $a0, $t0, 4080 - add.d $a0, $s8, $a0 - xvstelm.w $xr13, $a0, 0, 3 - vpickve2gr.h $a0, $vr14, 4 - andi $a0, $a0, 1 - beqz $a0, .LBB0_33 + ori $a5, $t0, 4080 + add.d $a5, $t4, $a5 + xvstelm.w $xr13, $a5, 0, 3 + vpickve2gr.h $a5, $vr14, 4 + andi $a5, $a5, 1 + beqz $a5, .LBB0_33 .LBB0_39: # %pred.store.if830 # in Loop: Header=BB0_27 Depth=3 - ori $a0, $t0, 4084 - add.d $a0, $s8, $a0 - xvstelm.w $xr13, $a0, 0, 4 - vpickve2gr.h $a0, $vr14, 5 - andi $a0, $a0, 1 - beqz $a0, .LBB0_34 + ori $a5, $t0, 4084 + add.d $a5, $t4, $a5 + xvstelm.w $xr13, $a5, 0, 4 + vpickve2gr.h $a5, $vr14, 5 + andi $a5, $a5, 1 + beqz $a5, .LBB0_34 .LBB0_40: # %pred.store.if832 # in Loop: Header=BB0_27 Depth=3 - add.d $a0, $s8, $a2 - xvstelm.w $xr13, $a0, 0, 5 - vpickve2gr.h $a0, $vr14, 6 - andi $a0, $a0, 1 - beqz $a0, .LBB0_35 + ori $a5, $t0, 4088 + add.d $a5, $t4, $a5 + xvstelm.w $xr13, $a5, 0, 5 + vpickve2gr.h $a5, $vr14, 6 + andi $a5, $a5, 1 + beqz $a5, .LBB0_35 .LBB0_41: # %pred.store.if834 # in Loop: Header=BB0_27 Depth=3 - ori $a0, $t0, 4092 - add.d $a0, $s8, $a0 - xvstelm.w $xr13, $a0, 0, 6 - vpickve2gr.h $a0, $vr14, 7 - andi $a0, $a0, 1 - beqz $a0, .LBB0_26 + ori $a5, $t0, 4092 + add.d $a5, $t4, $a5 + xvstelm.w $xr13, $a5, 0, 6 + vpickve2gr.h $a5, $vr14, 7 + andi $a5, $a5, 1 + beqz $a5, .LBB0_26 .LBB0_42: # %pred.store.if836 # in Loop: Header=BB0_27 Depth=3 - lu12i.w $a0, 2 - add.d $a0, $s8, $a0 - xvstelm.w $xr13, $a0, 0, 7 + lu12i.w $a5, 2 + add.d $a5, $t4, $a5 + xvstelm.w $xr13, $a5, 0, 7 b .LBB0_26 .p2align 4, , 16 .LBB0_43: # in Loop: Header=BB0_24 Depth=2 - ori $a0, $zero, 1017 + ori $a5, $zero, 1017 .LBB0_44: # %scalar.ph810.preheader # in Loop: Header=BB0_24 Depth=2 - slli.d $s4, $a0, 2 + slli.d $t4, $a5, 2 ori $s8, $zero, 4092 - move $a0, $t4 move $a6, $a3 - move $t2, $a4 + move $t2, $a2 move $a5, $s6 move $s3, $s7 move $s5, $s2 + move $a7, $s1 b .LBB0_48 .p2align 4, , 16 .LBB0_45: # in Loop: Header=BB0_48 Depth=3 movgr2fr.w $ft5, $zero .LBB0_46: # %.sink.split669 # in Loop: Header=BB0_48 Depth=3 - fstx.s $ft5, $s5, $s4 + fstx.s $ft5, $a7, $t4 .LBB0_47: # in Loop: Header=BB0_48 Depth=3 addi.d $s8, $s8, -4 + addi.d $a7, $a7, 4 addi.d $s5, $s5, 4 addi.d $s3, $s3, 4 addi.d $a5, $a5, 4 addi.d $t2, $t2, 4 addi.d $a6, $a6, 4 - addi.d $a0, $a0, 4 - beq $s4, $s8, .LBB0_23 + beq $t4, $s8, .LBB0_23 .LBB0_48: # %scalar.ph810 # Parent Loop BB0_1 Depth=1 # Parent Loop BB0_24 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $s1, $t2, $s4 - fldx.s $ft5, $t2, $s4 - fldx.s $ft6, $s1, $t3 + add.d $a1, $a5, $t4 + fldx.s $ft5, $a5, $t4 + fldx.s $ft6, $a1, $t3 fsub.s $ft6, $ft6, $ft5 - fstx.s $ft6, $a5, $s4 - fldx.s $ft6, $s1, $t0 + fstx.s $ft6, $s3, $t4 + fldx.s $ft6, $a1, $t0 fsub.s $ft6, $ft6, $ft5 - fstx.s $ft6, $a6, $s4 - fld.s $ft6, $s1, -4 + fstx.s $ft6, $t2, $t4 + fld.s $ft6, $a1, -4 fsub.s $ft6, $ft6, $ft5 - fstx.s $ft6, $a0, $s4 - fld.s $ft6, $s1, 4 + fstx.s $ft6, $a6, $t4 + fld.s $ft6, $a1, 4 fsub.s $ft6, $ft6, $ft5 - fstx.s $ft6, $s3, $s4 - fldx.s $ft7, $a5, $s4 - fldx.s $ft8, $a6, $s4 - fldx.s $ft9, $a0, $s4 + fstx.s $ft6, $s5, $t4 + fldx.s $ft7, $s3, $t4 + fldx.s $ft8, $t2, $t4 + fldx.s $ft9, $a6, $t4 fmul.s $ft10, $ft7, $ft7 fmul.s $ft11, $ft8, $ft8 fadd.s $ft10, $ft10, $ft11 @@ -1035,7 +984,7 @@ srad_kernel: # @srad_kernel frecip.d $ft5, $ft5 fcvt.s.d $ft6, $ft5 fcmp.clt.d $fcc0, $ft5, $fa7 - fstx.s $ft6, $s5, $s4 + fstx.s $ft6, $a7, $t4 bcnez $fcc0, .LBB0_45 # %bb.49: # in Loop: Header=BB0_48 Depth=3 movgr2fr.d $ft6, $ra @@ -1048,164 +997,140 @@ srad_kernel: # @srad_kernel .LBB0_51: # %vector.memcheck698 # in Loop: Header=BB0_1 Depth=1 xvldi $xr9, -960 - ld.d $s0, $sp, 200 # 8-byte Folded Reload - ld.d $s1, $sp, 192 # 8-byte Folded Reload + ld.d $fp, $sp, 200 # 8-byte Folded Reload + ld.d $s0, $sp, 192 # 8-byte Folded Reload + ld.d $s1, $sp, 184 # 8-byte Folded Reload + ld.d $s2, $sp, 176 # 8-byte Folded Reload ld.d $a0, $sp, 64 # 8-byte Folded Reload + ori $a7, $zero, 4068 beqz $a0, .LBB0_53 # %bb.52: # in Loop: Header=BB0_1 Depth=1 move $a1, $zero - ld.d $a6, $sp, 152 # 8-byte Folded Reload - ld.d $a5, $sp, 144 # 8-byte Folded Reload - ld.d $s5, $sp, 136 # 8-byte Folded Reload - ld.d $a2, $sp, 128 # 8-byte Folded Reload - ld.d $s7, $sp, 120 # 8-byte Folded Reload - ld.d $a0, $sp, 112 # 8-byte Folded Reload - ld.d $fp, $sp, 208 # 8-byte Folded Reload - ld.d $t7, $sp, 72 # 8-byte Folded Reload - ld.d $s2, $sp, 184 # 8-byte Folded Reload + ld.d $a6, $sp, 144 # 8-byte Folded Reload + ld.d $a5, $sp, 136 # 8-byte Folded Reload + ld.d $s6, $sp, 128 # 8-byte Folded Reload + ld.d $a2, $sp, 120 # 8-byte Folded Reload + ld.d $s8, $sp, 112 # 8-byte Folded Reload + ld.d $t8, $sp, 72 # 8-byte Folded Reload b .LBB0_56 .p2align 4, , 16 .LBB0_53: # %vector.body726.preheader # in Loop: Header=BB0_1 Depth=1 - ori $a3, $t3, 32 - ld.d $a6, $sp, 152 # 8-byte Folded Reload - ld.d $a5, $sp, 144 # 8-byte Folded Reload - ld.d $s5, $sp, 136 # 8-byte Folded Reload - ld.d $a2, $sp, 128 # 8-byte Folded Reload - ld.d $s7, $sp, 120 # 8-byte Folded Reload - ld.d $a0, $sp, 112 # 8-byte Folded Reload - ld.d $a4, $sp, 160 # 8-byte Folded Reload - ld.d $fp, $sp, 208 # 8-byte Folded Reload - ld.d $t7, $sp, 72 # 8-byte Folded Reload - ld.d $s2, $sp, 184 # 8-byte Folded Reload - ori $a7, $zero, 4068 + ori $a0, $t3, 32 + ld.d $a6, $sp, 144 # 8-byte Folded Reload + ld.d $a5, $sp, 136 # 8-byte Folded Reload + ld.d $s6, $sp, 128 # 8-byte Folded Reload + ld.d $a2, $sp, 120 # 8-byte Folded Reload + ld.d $s8, $sp, 112 # 8-byte Folded Reload + ld.d $a3, $sp, 152 # 8-byte Folded Reload + ld.d $t8, $sp, 72 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_54: # %vector.body726 # Parent Loop BB0_1 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $a1, $a4, $a3 + add.d $a1, $a3, $a0 xvldx $xr10, $a1, $t5 xvldx $xr11, $a1, $a7 - add.d $a1, $fp, $a3 + add.d $a1, $fp, $a0 xvldx $xr12, $a1, $t5 - add.d $a1, $s0, $a3 + add.d $a1, $s0, $a0 xvldx $xr13, $a1, $t5 - add.d $a1, $s1, $a3 + add.d $a1, $s1, $a0 xvldx $xr14, $a1, $t5 xvfmul.s $xr12, $xr10, $xr12 xvfmul.s $xr13, $xr10, $xr13 xvfadd.s $xr12, $xr12, $xr13 xvfmul.s $xr10, $xr10, $xr14 - add.d $a1, $s2, $a3 + add.d $a1, $s2, $a0 xvldx $xr13, $a1, $t5 - add.d $a1, $t7, $a3 + add.d $a1, $t8, $a0 xvldx $xr14, $a1, $t5 xvfadd.s $xr10, $xr12, $xr10 xvfmul.s $xr11, $xr11, $xr13 - xvfadd.s $xr10, $xr10, $xr11 - vreplvei.w $vr11, $vr14, 3 - fcvt.d.s $ft3, $ft3 - vreplvei.w $vr12, $vr14, 2 + xvfadd.s $xr11, $xr10, $xr11 + xvpermi.q $xr10, $xr14, 1 + vreplvei.w $vr12, $vr10, 3 fcvt.d.s $ft4, $ft4 - vextrins.d $vr12, $vr11, 16 - vreplvei.w $vr11, $vr14, 1 - fcvt.d.s $ft5, $ft3 - vreplvei.w $vr11, $vr14, 0 - fcvt.d.s $ft3, $ft3 - vextrins.d $vr11, $vr13, 16 - xvpermi.q $xr11, $xr12, 2 - xvpermi.q $xr12, $xr14, 1 - vreplvei.w $vr13, $vr12, 3 + vreplvei.w $vr13, $vr10, 2 fcvt.d.s $ft5, $ft5 - vreplvei.w $vr14, $vr12, 2 - fcvt.d.s $ft6, $ft6 - vextrins.d $vr14, $vr13, 16 - vreplvei.w $vr13, $vr12, 1 + vextrins.d $vr13, $vr12, 16 + vreplvei.w $vr12, $vr10, 1 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr10, $vr10, 0 + fcvt.d.s $ft2, $ft2 + vextrins.d $vr10, $vr12, 16 + xvpermi.q $xr10, $xr13, 2 + vreplvei.w $vr12, $vr14, 3 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr13, $vr14, 2 fcvt.d.s $ft5, $ft5 - vreplvei.w $vr12, $vr12, 0 + vextrins.d $vr13, $vr12, 16 + vreplvei.w $vr12, $vr14, 1 fcvt.d.s $ft4, $ft4 - vextrins.d $vr12, $vr13, 16 - xvpermi.q $xr12, $xr14, 2 - xvpermi.q $xr13, $xr10, 1 - vreplvei.w $vr14, $vr13, 3 - fcvt.d.s $ft6, $ft6 - vreplvei.w $vr15, $vr13, 2 - fcvt.d.s $ft7, $ft7 - vextrins.d $vr15, $vr14, 16 - vreplvei.w $vr14, $vr13, 1 + vreplvei.w $vr14, $vr14, 0 fcvt.d.s $ft6, $ft6 - vreplvei.w $vr13, $vr13, 0 + vextrins.d $vr14, $vr12, 16 + xvpermi.q $xr14, $xr13, 2 + vreplvei.w $vr12, $vr11, 3 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr13, $vr11, 2 fcvt.d.s $ft5, $ft5 - vextrins.d $vr13, $vr14, 16 - xvpermi.q $xr13, $xr15, 2 - vreplvei.w $vr14, $vr10, 3 - fcvt.d.s $ft6, $ft6 - vreplvei.w $vr15, $vr10, 2 + vextrins.d $vr13, $vr12, 16 + vreplvei.w $vr12, $vr11, 1 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr15, $vr11, 0 fcvt.d.s $ft7, $ft7 - vextrins.d $vr15, $vr14, 16 - vreplvei.w $vr14, $vr10, 1 - fcvt.d.s $ft6, $ft6 - vreplvei.w $vr10, $vr10, 0 - fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr14, 16 - xvpermi.q $xr10, $xr15, 2 - xvfmul.d $xr10, $xr10, $xr9 - xvfmul.d $xr13, $xr13, $xr9 - xvfadd.d $xr12, $xr13, $xr12 - xvfadd.d $xr10, $xr10, $xr11 - xvpickve.d $xr11, $xr10, 1 - fcvt.s.d $ft3, $ft3 - xvpickve.d $xr13, $xr10, 0 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr13, $vr11, 16 - xvpickve.d $xr11, $xr10, 2 - fcvt.s.d $ft3, $ft3 - vextrins.w $vr13, $vr11, 32 - xvpickve.d $xr10, $xr10, 3 - fcvt.s.d $ft2, $ft2 - vextrins.w $vr13, $vr10, 48 - xvpickve.d $xr10, $xr12, 1 - fcvt.s.d $ft2, $ft2 - xvpickve.d $xr11, $xr12, 0 - fcvt.s.d $ft3, $ft3 - vextrins.w $vr11, $vr10, 16 - xvpickve.d $xr10, $xr12, 2 - fcvt.s.d $ft2, $ft2 - vextrins.w $vr11, $vr10, 32 - xvpickve.d $xr10, $xr12, 3 - fcvt.s.d $ft2, $ft2 - vextrins.w $vr11, $vr10, 48 - xvpermi.q $xr13, $xr11, 2 - addi.d $a3, $a3, 32 - xvstx $xr13, $a1, $t5 - bnez $a3, .LBB0_54 + vextrins.d $vr15, $vr12, 16 + xvpermi.q $xr15, $xr13, 2 + xvpermi.q $xr11, $xr11, 1 + vreplvei.w $vr12, $vr11, 3 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr13, $vr11, 2 + fcvt.d.s $ft5, $ft5 + vextrins.d $vr13, $vr12, 16 + vreplvei.w $vr12, $vr11, 1 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr11, $vr11, 0 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr12, 16 + xvpermi.q $xr11, $xr13, 2 + xvfmul.d $xr11, $xr11, $xr9 + xvfmul.d $xr12, $xr15, $xr9 + xvld $xr13, $a4, %pc_lo12(.LCPI0_0) + xvfadd.d $xr12, $xr12, $xr14 + xvfadd.d $xr10, $xr11, $xr10 + xvfcvt.s.d $xr10, $xr10, $xr12 + xvperm.w $xr10, $xr10, $xr13 + addi.d $a0, $a0, 32 + xvstx $xr10, $a1, $t5 + bnez $a0, .LBB0_54 # %bb.55: # in Loop: Header=BB0_1 Depth=1 ori $a1, $zero, 1016 .LBB0_56: # %.preheader623.preheader # in Loop: Header=BB0_1 Depth=1 - slli.d $t8, $a1, 2 + slli.d $a0, $a1, 2 addi.d $a1, $a1, -1023 move $a3, $s2 - move $a4, $s1 + move $t7, $s1 move $t6, $s0 - move $t4, $fp - ld.d $a7, $sp, 56 # 8-byte Folded Reload + move $a7, $fp + ld.d $t2, $sp, 56 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_57: # %.preheader623 # Parent Loop BB0_1 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $t2, $a7, $t8 - fld.s $ft2, $t2, -4 - fldx.s $ft3, $t4, $t8 - fldx.s $ft4, $t6, $t8 - fldx.s $ft5, $a7, $t8 + add.d $t4, $t2, $a0 + fld.s $ft2, $t4, -4 + fldx.s $ft3, $a7, $a0 + fldx.s $ft4, $t6, $a0 + fldx.s $ft5, $t2, $a0 fmul.s $ft3, $ft2, $ft3 - fldx.s $ft6, $a4, $t8 + fldx.s $ft6, $t7, $a0 fmul.s $ft4, $ft2, $ft4 fadd.s $ft3, $ft3, $ft4 - fldx.s $ft4, $a3, $t8 + fldx.s $ft4, $a3, $a0 fmul.s $ft2, $ft2, $ft6 - fldx.s $ft6, $t7, $t8 + fldx.s $ft6, $t8, $a0 fadd.s $ft2, $ft3, $ft2 fmul.s $ft3, $ft5, $ft4 fadd.s $ft2, $ft2, $ft3 @@ -1214,29 +1139,29 @@ srad_kernel: # @srad_kernel fmul.d $ft2, $ft2, $ft0 fadd.d $ft2, $ft2, $ft3 fcvt.s.d $ft2, $ft2 - fstx.s $ft2, $t7, $t8 + fstx.s $ft2, $t8, $a0 + addi.d $t2, $t2, 4 addi.d $a7, $a7, 4 - addi.d $t4, $t4, 4 addi.d $t6, $t6, 4 - addi.d $a4, $a4, 4 + addi.d $t7, $t7, 4 addi.d $a3, $a3, 4 addi.d $a1, $a1, 1 - addi.d $t7, $t7, 4 + addi.d $t8, $t8, 4 bnez $a1, .LBB0_57 # %bb.58: # in Loop: Header=BB0_1 Depth=1 - ld.d $a1, $sp, 216 # 8-byte Folded Reload - fld.s $ft2, $a1, 0 + ld.d $a0, $sp, 208 # 8-byte Folded Reload + fld.s $ft2, $a0, 0 ori $a1, $zero, 4092 fldx.s $ft3, $fp, $a1 fldx.s $ft4, $s0, $a1 - move $s8, $zero + move $a0, $zero fmul.s $ft3, $ft2, $ft3 fldx.s $ft5, $s1, $a1 fmul.s $ft4, $ft2, $ft4 fadd.s $ft3, $ft3, $ft4 fldx.s $ft4, $s2, $a1 fmul.s $ft5, $ft2, $ft5 - ld.d $a1, $sp, 224 # 8-byte Folded Reload + ld.d $a1, $sp, 216 # 8-byte Folded Reload ori $a3, $t0, 4092 fldx.s $ft6, $a1, $a3 fadd.s $ft3, $ft3, $ft5 @@ -1249,12 +1174,12 @@ srad_kernel: # @srad_kernel fcvt.s.d $ft2, $ft2 fstx.s $ft2, $a1, $a3 move $a1, $a6 - move $s6, $a0 - move $a4, $s7 + ld.d $s7, $sp, 224 # 8-byte Folded Reload + move $s5, $s8 move $s4, $a2 - move $s3, $s5 - move $a7, $a5 - ld.d $t2, $sp, 176 # 8-byte Folded Reload + move $a7, $s6 + move $t2, $a5 + ld.d $t6, $sp, 168 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_59: # %.preheader # Parent Loop BB0_1 Depth=1 @@ -1269,139 +1194,119 @@ srad_kernel: # @srad_kernel .p2align 4, , 16 .LBB0_61: # %vector.body.preheader # in Loop: Header=BB0_59 Depth=2 - ori $t6, $t3, 32 + ori $t4, $t3, 32 .p2align 4, , 16 .LBB0_62: # %vector.body # Parent Loop BB0_1 Depth=1 # Parent Loop BB0_59 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $a3, $t2, $t6 + add.d $a3, $t6, $t4 xvld $xr10, $a3, -32 xvldx $xr11, $a3, $t5 xvld $xr12, $a3, -28 - add.d $a3, $s6, $t6 + add.d $a3, $s7, $t4 xvldx $xr13, $a3, $t5 - add.d $a3, $a4, $t6 + add.d $a3, $s5, $t4 xvldx $xr14, $a3, $t5 - add.d $a3, $s4, $t6 + add.d $a3, $s4, $t4 xvldx $xr15, $a3, $t5 xvfmul.s $xr13, $xr10, $xr13 xvfmul.s $xr11, $xr11, $xr14 xvfadd.s $xr11, $xr13, $xr11 xvfmul.s $xr10, $xr10, $xr15 - add.d $a3, $s3, $t6 + add.d $a3, $a7, $t4 xvldx $xr13, $a3, $t5 - add.d $t7, $a7, $t6 + add.d $t7, $t2, $t4 xvldx $xr14, $t7, $t5 xvfadd.s $xr10, $xr11, $xr10 xvfmul.s $xr11, $xr12, $xr13 - xvfadd.s $xr10, $xr10, $xr11 - vreplvei.w $vr11, $vr14, 3 - fcvt.d.s $ft3, $ft3 - vreplvei.w $vr12, $vr14, 2 + xvfadd.s $xr11, $xr10, $xr11 + xvpermi.q $xr10, $xr14, 1 + vreplvei.w $vr12, $vr10, 3 fcvt.d.s $ft4, $ft4 - vextrins.d $vr12, $vr11, 16 - vreplvei.w $vr11, $vr14, 1 - fcvt.d.s $ft5, $ft3 - vreplvei.w $vr11, $vr14, 0 - fcvt.d.s $ft3, $ft3 - vextrins.d $vr11, $vr13, 16 - xvpermi.q $xr11, $xr12, 2 - xvpermi.q $xr12, $xr14, 1 - vreplvei.w $vr13, $vr12, 3 + vreplvei.w $vr13, $vr10, 2 fcvt.d.s $ft5, $ft5 - vreplvei.w $vr14, $vr12, 2 - fcvt.d.s $ft6, $ft6 - vextrins.d $vr14, $vr13, 16 - vreplvei.w $vr13, $vr12, 1 + vextrins.d $vr13, $vr12, 16 + vreplvei.w $vr12, $vr10, 1 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr10, $vr10, 0 + fcvt.d.s $ft2, $ft2 + vextrins.d $vr10, $vr12, 16 + xvpermi.q $xr10, $xr13, 2 + vreplvei.w $vr12, $vr14, 3 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr13, $vr14, 2 fcvt.d.s $ft5, $ft5 - vreplvei.w $vr12, $vr12, 0 + vextrins.d $vr13, $vr12, 16 + vreplvei.w $vr12, $vr14, 1 fcvt.d.s $ft4, $ft4 - vextrins.d $vr12, $vr13, 16 - xvpermi.q $xr12, $xr14, 2 - xvpermi.q $xr13, $xr10, 1 - vreplvei.w $vr14, $vr13, 3 - fcvt.d.s $ft6, $ft6 - vreplvei.w $vr15, $vr13, 2 - fcvt.d.s $ft7, $ft7 - vextrins.d $vr15, $vr14, 16 - vreplvei.w $vr14, $vr13, 1 + vreplvei.w $vr14, $vr14, 0 fcvt.d.s $ft6, $ft6 - vreplvei.w $vr13, $vr13, 0 + vextrins.d $vr14, $vr12, 16 + xvpermi.q $xr14, $xr13, 2 + vreplvei.w $vr12, $vr11, 3 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr13, $vr11, 2 fcvt.d.s $ft5, $ft5 - vextrins.d $vr13, $vr14, 16 - xvpermi.q $xr13, $xr15, 2 - vreplvei.w $vr14, $vr10, 3 - fcvt.d.s $ft6, $ft6 - vreplvei.w $vr15, $vr10, 2 + vextrins.d $vr13, $vr12, 16 + vreplvei.w $vr12, $vr11, 1 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr15, $vr11, 0 fcvt.d.s $ft7, $ft7 - vextrins.d $vr15, $vr14, 16 - vreplvei.w $vr14, $vr10, 1 - fcvt.d.s $ft6, $ft6 - vreplvei.w $vr10, $vr10, 0 - fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr14, 16 - xvpermi.q $xr10, $xr15, 2 - xvfmul.d $xr10, $xr10, $xr9 - xvfmul.d $xr13, $xr13, $xr9 - xvfadd.d $xr12, $xr13, $xr12 - xvfadd.d $xr10, $xr10, $xr11 - xvpickve.d $xr11, $xr10, 1 - fcvt.s.d $ft3, $ft3 - xvpickve.d $xr13, $xr10, 0 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr13, $vr11, 16 - xvpickve.d $xr11, $xr10, 2 - fcvt.s.d $ft3, $ft3 - vextrins.w $vr13, $vr11, 32 - xvpickve.d $xr10, $xr10, 3 - fcvt.s.d $ft2, $ft2 - vextrins.w $vr13, $vr10, 48 - xvpickve.d $xr10, $xr12, 1 - fcvt.s.d $ft2, $ft2 - xvpickve.d $xr11, $xr12, 0 - fcvt.s.d $ft3, $ft3 - vextrins.w $vr11, $vr10, 16 - xvpickve.d $xr10, $xr12, 2 - fcvt.s.d $ft2, $ft2 - vextrins.w $vr11, $vr10, 32 - xvpickve.d $xr10, $xr12, 3 - fcvt.s.d $ft2, $ft2 - vextrins.w $vr11, $vr10, 48 - xvpermi.q $xr13, $xr11, 2 - addi.d $t6, $t6, 32 - xvstx $xr13, $t7, $t5 - bnez $t6, .LBB0_62 + vextrins.d $vr15, $vr12, 16 + xvpermi.q $xr15, $xr13, 2 + xvpermi.q $xr11, $xr11, 1 + vreplvei.w $vr12, $vr11, 3 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr13, $vr11, 2 + fcvt.d.s $ft5, $ft5 + vextrins.d $vr13, $vr12, 16 + vreplvei.w $vr12, $vr11, 1 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr11, $vr11, 0 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr12, 16 + xvpermi.q $xr11, $xr13, 2 + xvfmul.d $xr11, $xr11, $xr9 + xvfmul.d $xr12, $xr15, $xr9 + xvld $xr13, $a4, %pc_lo12(.LCPI0_0) + xvfadd.d $xr12, $xr12, $xr14 + xvfadd.d $xr10, $xr11, $xr10 + xvfcvt.s.d $xr10, $xr10, $xr12 + xvperm.w $xr10, $xr10, $xr13 + addi.d $t4, $t4, 32 + xvstx $xr10, $t7, $t5 + bnez $t4, .LBB0_62 # %bb.63: # in Loop: Header=BB0_59 Depth=2 ori $t4, $zero, 1016 .LBB0_64: # %scalar.ph.preheader # in Loop: Header=BB0_59 Depth=2 slli.d $a3, $t4, 2 addi.d $t4, $t4, -1023 - move $t6, $a7 - move $t7, $s3 - move $t8, $s4 - move $fp, $a4 - move $s0, $s6 + move $t7, $t2 + move $t8, $a7 + move $fp, $s4 + move $s0, $s5 + move $s1, $s7 move $s2, $a1 .p2align 4, , 16 .LBB0_65: # %scalar.ph # Parent Loop BB0_1 Depth=1 # Parent Loop BB0_59 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $s1, $s2, $a3 - fldx.s $ft2, $s1, $t0 - fldx.s $ft3, $fp, $a3 + add.d $s3, $s2, $a3 + fldx.s $ft2, $s3, $t0 + fldx.s $ft3, $s0, $a3 fldx.s $ft4, $s2, $a3 - fldx.s $ft5, $s0, $a3 - fld.s $ft6, $s1, 4 + fldx.s $ft5, $s1, $a3 + fld.s $ft6, $s3, 4 fmul.s $ft2, $ft2, $ft3 - fldx.s $ft3, $t8, $a3 + fldx.s $ft3, $fp, $a3 fmul.s $ft5, $ft4, $ft5 fadd.s $ft2, $ft5, $ft2 - fldx.s $ft5, $t7, $a3 + fldx.s $ft5, $t8, $a3 fmul.s $ft3, $ft4, $ft3 - fldx.s $ft4, $t6, $a3 + fldx.s $ft4, $t7, $a3 fadd.s $ft2, $ft2, $ft3 fmul.s $ft3, $ft6, $ft5 fadd.s $ft2, $ft2, $ft3 @@ -1410,38 +1315,37 @@ srad_kernel: # @srad_kernel fmul.d $ft2, $ft2, $ft0 fadd.d $ft2, $ft2, $ft3 fcvt.s.d $ft2, $ft2 - fstx.s $ft2, $t6, $a3 + fstx.s $ft2, $t7, $a3 addi.d $s2, $s2, 4 + addi.d $s1, $s1, 4 addi.d $s0, $s0, 4 addi.d $fp, $fp, 4 addi.d $t8, $t8, 4 - addi.d $t7, $t7, 4 addi.d $t4, $t4, 1 - addi.d $t6, $t6, 4 + addi.d $t7, $t7, 4 bnez $t4, .LBB0_65 # %bb.66: # in Loop: Header=BB0_59 Depth=2 - addi.d $s8, $s8, 1 + addi.d $a0, $a0, 1 + add.d $t6, $t6, $t0 add.d $t2, $t2, $t0 add.d $a7, $a7, $t0 - add.d $s3, $s3, $t0 add.d $s4, $s4, $t0 - add.d $a4, $a4, $t0 - add.d $s6, $s6, $t0 + add.d $s5, $s5, $t0 + add.d $s7, $s7, $t0 add.d $a1, $a1, $t0 ori $a3, $zero, 2047 - bne $s8, $a3, .LBB0_59 + bne $a0, $a3, .LBB0_59 # %bb.67: # in Loop: Header=BB0_1 Depth=1 - ld.d $s1, $sp, 168 # 8-byte Folded Reload + ld.d $s1, $sp, 160 # 8-byte Folded Reload addi.w $s1, $s1, 1 - ld.d $t6, $sp, 208 # 8-byte Folded Reload - ld.d $s2, $sp, 200 # 8-byte Folded Reload - ld.d $s6, $sp, 192 # 8-byte Folded Reload - ld.d $s4, $sp, 184 # 8-byte Folded Reload - lu52i.d $t2, $zero, -1029 - ori $a7, $zero, 128 - ori $t4, $zero, 4092 - ori $a1, $zero, 10 - bne $s1, $a1, .LBB0_1 + ld.d $t6, $sp, 200 # 8-byte Folded Reload + ld.d $s2, $sp, 192 # 8-byte Folded Reload + ld.d $s5, $sp, 184 # 8-byte Folded Reload + ld.d $s4, $sp, 176 # 8-byte Folded Reload + ori $a4, $zero, 128 + ori $a7, $zero, 4092 + ori $a0, $zero, 10 + bne $s1, $a0, .LBB0_1 # %bb.68: ld.d $s8, $sp, 248 # 8-byte Folded Reload ld.d $s7, $sp, 256 # 8-byte Folded Reload diff --git a/results/MultiSource/Benchmarks/TSVC/ControlFlow-flt/CMakeFiles/ControlFlow-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/ControlFlow-flt/CMakeFiles/ControlFlow-flt.dir/tsc.s index 0a305b7..48508ab 100644 --- a/results/MultiSource/Benchmarks/TSVC/ControlFlow-flt/CMakeFiles/ControlFlow-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/ControlFlow-flt/CMakeFiles/ControlFlow-flt.dir/tsc.s @@ -11,6 +11,11 @@ .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI0_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1d .p2align 5 @@ -63,30 +68,29 @@ set1d: # @set1d bnez $a1, .LBB0_3 b .LBB0_8 .LBB0_4: # %vector.body40.preheader - addi.d $a0, $a0, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a1, %pc_hi20(.LCPI0_2) + vld $vr0, $a1, %pc_lo12(.LCPI0_2) + addi.d $a0, $a0, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB0_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB0_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB0_5 b .LBB0_8 .LBB0_6: # %.preheader.preheader move $a2, $zero @@ -118,6 +122,11 @@ set1d: # @set1d .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI1_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1ds .p2align 5 @@ -170,30 +179,29 @@ set1ds: # @set1ds bnez $a0, .LBB1_3 b .LBB1_8 .LBB1_4: # %vector.body40.preheader - addi.d $a0, $a1, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) + addi.d $a0, $a1, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB1_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB1_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB1_5 b .LBB1_8 .LBB1_6: # %.preheader.preheader move $a0, $zero @@ -685,14 +693,19 @@ check: # @check .word .LBB4_14-.LJTI4_0 .word .LBB4_17-.LJTI4_0 # -- End function - .section .rodata.cst32,"aM",@progbits,32 - .p2align 5, 0x0 # -- Begin function init + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 # -- Begin function init .LCPI5_0: + .dword 0 # 0x0 + .dword 1 # 0x1 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 +.LCPI5_1: .dword 4 # 0x4 .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 -.LCPI5_1: +.LCPI5_2: .dword 0 # 0x0 .dword 1 # 0x1 .dword 2 # 0x2 @@ -703,11 +716,12 @@ check: # @check .type init,@function init: # @init # %bb.0: - addi.d $sp, $sp, -80 - st.d $ra, $sp, 72 # 8-byte Folded Spill - st.d $fp, $sp, 64 # 8-byte Folded Spill - st.d $s0, $sp, 56 # 8-byte Folded Spill - st.d $s1, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -96 + st.d $ra, $sp, 88 # 8-byte Folded Spill + st.d $fp, $sp, 80 # 8-byte Folded Spill + st.d $s0, $sp, 72 # 8-byte Folded Spill + st.d $s1, $sp, 64 # 8-byte Folded Spill + st.d $s2, $sp, 56 # 8-byte Folded Spill move $fp, $a0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a1, $a0, %pc_lo12(.L.str.1) @@ -1661,131 +1675,122 @@ init: # @init jirl $ra, $ra, 0 bnez $a0, .LBB5_573 # %bb.136: # %vector.body.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 + addi.d $a3, $a0, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 .LBB5_137: # %vector.body # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_137 # %bb.138: # %vector.body3598.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 + lu12i.w $a3, 31 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 .LBB5_139: # %vector.body3598 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_139 -# %bb.140: # %vector.body3603.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_141: # %vector.body3603 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.140: # %vector.body3606.preheader + lu12i.w $a3, 62 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_141: # %vector.body3606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_141 -# %bb.142: # %vector.body3608.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_143: # %vector.body3608 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.142: # %vector.body3614.preheader + lu12i.w $a3, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_143: # %vector.body3614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_143 -# %bb.144: # %vector.body3613.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_145: # %vector.body3613 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 - bnez $a1, .LBB5_145 +# %bb.144: # %vector.body3622.preheader + lu12i.w $a3, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a3, 136 + add.d $a1, $a0, $a1 + ori $a2, $a2, 3328 +.LBB5_145: # %vector.body3622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_145 # %bb.146: # %.preheader34.i2938.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -1837,7 +1842,7 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_147 b .LBB5_573 -.LBB5_148: # %vector.body5445.preheader +.LBB5_148: # %vector.body5846.preheader pcalau12i $a0, %pc_hi20(X+4) addi.d $a0, $a0, %pc_lo12(X+4) pcalau12i $a1, %pc_hi20(V+4) @@ -1852,7 +1857,7 @@ init: # @init lu12i.w $a6, 7 ori $a6, $a6, 3328 .p2align 4, , 16 -.LBB5_149: # %vector.body5445 +.LBB5_149: # %vector.body5846 # =>This Inner Loop Header: Depth=1 bstrpick.d $a7, $a5, 31, 0 addi.d $t0, $a5, -5 @@ -1896,7 +1901,7 @@ init: # @init addi.d $a4, $a4, 8 bnez $a6, .LBB5_149 b .LBB5_573 -.LBB5_150: # %vector.body5412.preheader +.LBB5_150: # %vector.body5813.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -1904,24 +1909,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_151: # %vector.body5412 +.LBB5_151: # %vector.body5813 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_151 -# %bb.152: # %vector.body5417.preheader +# %bb.152: # %vector.body5818.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_153: # %vector.body5417 +.LBB5_153: # %vector.body5818 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1952,15 +1957,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_153 -# %bb.154: # %vector.body5424.preheader +# %bb.154: # %vector.body5825.preheader lu12i.w $a4, 62 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 2096 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_155: # %vector.body5424 +.LBB5_155: # %vector.body5825 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1991,15 +1996,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_155 -# %bb.156: # %vector.body5431.preheader +# %bb.156: # %vector.body5832.preheader lu12i.w $a4, 93 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 3152 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_157: # %vector.body5431 +.LBB5_157: # %vector.body5832 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2030,15 +2035,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_157 -# %bb.158: # %vector.body5438.preheader +# %bb.158: # %vector.body5839.preheader lu12i.w $a4, 125 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_159: # %vector.body5438 +.LBB5_159: # %vector.body5839 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2070,7 +2075,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_159 b .LBB5_573 -.LBB5_160: # %vector.body5400.preheader +.LBB5_160: # %vector.body5801.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2078,24 +2083,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_161: # %vector.body5400 +.LBB5_161: # %vector.body5801 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_161 -# %bb.162: # %vector.body5405.preheader +# %bb.162: # %vector.body5806.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_163: # %vector.body5405 +.LBB5_163: # %vector.body5806 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2127,7 +2132,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_163 b .LBB5_573 -.LBB5_164: # %vector.body5388.preheader +.LBB5_164: # %vector.body5789.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2135,24 +2140,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_165: # %vector.body5388 +.LBB5_165: # %vector.body5789 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_165 -# %bb.166: # %vector.body5393.preheader +# %bb.166: # %vector.body5794.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_167: # %vector.body5393 +.LBB5_167: # %vector.body5794 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2288,7 +2293,7 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_171 b .LBB5_573 -.LBB5_172: # %vector.body5354.preheader +.LBB5_172: # %vector.body5755.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2296,7 +2301,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_173: # %vector.body5354 +.LBB5_173: # %vector.body5755 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2436,14 +2441,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_179 b .LBB5_573 -.LBB5_180: # %vector.body5349.preheader +.LBB5_180: # %vector.body5750.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_181: # %vector.body5349 +.LBB5_181: # %vector.body5750 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -2451,7 +2456,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_181 b .LBB5_573 -.LBB5_182: # %vector.body5339.preheader +.LBB5_182: # %vector.body5740.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2459,7 +2464,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_183: # %vector.body5339 +.LBB5_183: # %vector.body5740 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2610,30 +2615,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_189 b .LBB5_573 -.LBB5_190: # %vector.body5315.preheader +.LBB5_190: # %vector.body5716.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_191: # %vector.body5315 +.LBB5_191: # %vector.body5716 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_191 -# %bb.192: # %vector.body5320.preheader +# %bb.192: # %vector.body5721.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_193: # %vector.body5320 +.LBB5_193: # %vector.body5721 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2665,30 +2670,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_193 b .LBB5_573 -.LBB5_194: # %vector.body5303.preheader +.LBB5_194: # %vector.body5704.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_195: # %vector.body5303 +.LBB5_195: # %vector.body5704 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_195 -# %bb.196: # %vector.body5308.preheader +# %bb.196: # %vector.body5709.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_197: # %vector.body5308 +.LBB5_197: # %vector.body5709 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2734,74 +2739,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_199: # %vector.body5283 +.LBB5_199: # %vector.body5678 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_199 -# %bb.200: # %vector.body5288.preheader +# %bb.200: # %vector.body5683.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_201: # %vector.body5288 +.LBB5_201: # %vector.body5683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_201 -# %bb.202: # %vector.body5293.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.202: # %vector.body5688.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_203: # %vector.body5293 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_203: # %vector.body5688 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_203 -# %bb.204: # %vector.body5298.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.204: # %vector.body5696.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_205: # %vector.body5298 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_205: # %vector.body5696 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_205 b .LBB5_573 .LBB5_206: # %.preheader.i1195.preheader @@ -2818,74 +2820,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_207: # %vector.body5263 +.LBB5_207: # %vector.body5652 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_207 -# %bb.208: # %vector.body5268.preheader +# %bb.208: # %vector.body5657.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_209: # %vector.body5268 +.LBB5_209: # %vector.body5657 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_209 -# %bb.210: # %vector.body5273.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.210: # %vector.body5662.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_211: # %vector.body5273 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_211: # %vector.body5662 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_211 -# %bb.212: # %vector.body5278.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.212: # %vector.body5670.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_213: # %vector.body5278 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_213: # %vector.body5670 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_213 b .LBB5_573 .LBB5_214: # %.preheader.i1217.preheader @@ -3072,31 +3071,30 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_221 -# %bb.222: # %vector.body5236.preheader - pcalau12i $a1, %pc_hi20(array+4) - addi.d $a1, $a1, %pc_lo12(array+4) - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_223: # %vector.body5236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_223 +# %bb.222: # %vector.body5622.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(array+8) + addi.d $a1, $a1, %pc_lo12(array+8) + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_223: # %vector.body5622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_223 # %bb.224: # %.preheader34.i1253.preheader lu12i.w $a1, 284 ori $a1, $a1, 1888 @@ -3162,87 +3160,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_227: # %vector.body5211 +.LBB5_227: # %vector.body5588 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_227 -# %bb.228: # %vector.body5216.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.228: # %vector.body5593.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_229: # %vector.body5216 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_229: # %vector.body5593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_229 -# %bb.230: # %vector.body5221.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.230: # %vector.body5601.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_231: # %vector.body5221 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_231: # %vector.body5601 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_231 -# %bb.232: # %vector.body5226.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.232: # %vector.body5609.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_233: # %vector.body5226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_233: # %vector.body5609 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_233 b .LBB5_573 .LBB5_234: # %.preheader.i1284.preheader @@ -3259,32 +3252,32 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -3264 -.LBB5_235: # %vector.body5196 +.LBB5_235: # %vector.body5573 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_235 -# %bb.236: # %vector.body5201.preheader +# %bb.236: # %vector.body5578.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_237: # %vector.body5201 +.LBB5_237: # %vector.body5578 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_237 -# %bb.238: # %vector.body5206.preheader +# %bb.238: # %vector.body5583.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_239: # %vector.body5206 +.LBB5_239: # %vector.body5583 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -3292,30 +3285,30 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_239 b .LBB5_573 -.LBB5_240: # %vector.body5184.preheader +.LBB5_240: # %vector.body5561.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_241: # %vector.body5184 +.LBB5_241: # %vector.body5561 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_241 -# %bb.242: # %vector.body5189.preheader +# %bb.242: # %vector.body5566.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_243: # %vector.body5189 +.LBB5_243: # %vector.body5566 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3392,65 +3385,62 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_245 -# %bb.246: # %vector.body5174.preheader +# %bb.246: # %vector.body5545.preheader lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_247: # %vector.body5174 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_247: # %vector.body5545 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_247 -# %bb.248: # %vector.body5179.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.248: # %vector.body5553.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_249: # %vector.body5179 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_249: # %vector.body5553 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_249 b .LBB5_573 -.LBB5_250: # %vector.body5157.preheader +.LBB5_250: # %vector.body5528.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_251: # %vector.body5157 +.LBB5_251: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3506,30 +3496,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_253 b .LBB5_573 -.LBB5_254: # %vector.body5145.preheader +.LBB5_254: # %vector.body5516.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_255: # %vector.body5145 +.LBB5_255: # %vector.body5516 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_255 -# %bb.256: # %vector.body5150.preheader +# %bb.256: # %vector.body5521.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_257: # %vector.body5150 +.LBB5_257: # %vector.body5521 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3561,14 +3551,14 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_257 b .LBB5_573 -.LBB5_258: # %vector.body5125.preheader +.LBB5_258: # %vector.body5487.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 xvldi $xr0, -1424 -.LBB5_259: # %vector.body5125 +.LBB5_259: # %vector.body5487 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3583,274 +3573,263 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 + lu12i.w $a1, 62 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 2104 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_261: # %vector.body5130 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_261: # %vector.body5492 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_261 -# %bb.262: # %vector.body5135.preheader - lu12i.w $a0, 93 - ori $a0, $a0, 3156 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 +# %bb.262: # %vector.body5500.preheader + lu12i.w $a1, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 3160 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_263: # %vector.body5135 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_263: # %vector.body5500 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_263 -# %bb.264: # %vector.body5140.preheader - lu12i.w $a0, 125 - ori $a0, $a0, 132 +# %bb.264: # %vector.body5508.preheader + lu12i.w $a1, 125 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a1, 136 add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - ori $a2, $s0, 3328 -.LBB5_265: # %vector.body5140 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_265 + ori $a1, $s0, 3328 +.LBB5_265: # %vector.body5508 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_265 b .LBB5_573 -.LBB5_266: # %vector.body5094.preheader +.LBB5_266: # %vector.body5450.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_267: # %vector.body5094 +.LBB5_267: # %vector.body5450 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_267 -# %bb.268: # %vector.body5099.preheader +# %bb.268: # %vector.body5455.preheader lu12i.w $a2, 31 ori $a3, $a2, 1048 add.d $a4, $a0, $a3 lu12i.w $a3, 3 ori $a5, $a3, 3712 lu12i.w $a6, 260096 -.LBB5_269: # %vector.body5099 +.LBB5_269: # %vector.body5455 # =>This Inner Loop Header: Depth=1 st.w $a6, $a4, -8 st.w $a6, $a4, 0 addi.d $a5, $a5, -2 addi.d $a4, $a4, 16 bnez $a5, .LBB5_269 -# %bb.270: # %vector.body5104.preheader +# %bb.270: # %vector.body5460.preheader ori $a2, $a2, 1052 add.d $a2, $a0, $a2 ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 -.LBB5_271: # %vector.body5104 +.LBB5_271: # %vector.body5460 # =>This Inner Loop Header: Depth=1 st.w $a4, $a2, -8 st.w $a4, $a2, 0 addi.d $a3, $a3, -2 addi.d $a2, $a2, 16 bnez $a3, .LBB5_271 -# %bb.272: # %vector.body5110.preheader +# %bb.272: # %vector.body5466.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_273: # %vector.body5110 +.LBB5_273: # %vector.body5466 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_273 -# %bb.274: # %vector.body5115.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.274: # %vector.body5471.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_275: # %vector.body5115 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_275: # %vector.body5471 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_275 -# %bb.276: # %vector.body5120.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.276: # %vector.body5479.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_277: # %vector.body5120 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_277: # %vector.body5479 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_277 b .LBB5_573 -.LBB5_278: # %vector.body5079.preheader +.LBB5_278: # %vector.body5429.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_279: # %vector.body5079 +.LBB5_279: # %vector.body5429 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_279 -# %bb.280: # %vector.body5084.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.280: # %vector.body5434.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_281: # %vector.body5084 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_281: # %vector.body5434 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_281 -# %bb.282: # %vector.body5089.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.282: # %vector.body5442.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_283: # %vector.body5089 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_283: # %vector.body5442 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_283 b .LBB5_573 -.LBB5_284: # %vector.body5067.preheader +.LBB5_284: # %vector.body5417.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_285: # %vector.body5067 +.LBB5_285: # %vector.body5417 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_285 -# %bb.286: # %vector.body5072.preheader +# %bb.286: # %vector.body5422.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_287: # %vector.body5072 +.LBB5_287: # %vector.body5422 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3882,30 +3861,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_287 b .LBB5_573 -.LBB5_288: # %vector.body5055.preheader +.LBB5_288: # %vector.body5405.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_289: # %vector.body5055 +.LBB5_289: # %vector.body5405 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_289 -# %bb.290: # %vector.body5060.preheader +# %bb.290: # %vector.body5410.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_291: # %vector.body5060 +.LBB5_291: # %vector.body5410 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3937,30 +3916,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_291 b .LBB5_573 -.LBB5_292: # %vector.body5043.preheader +.LBB5_292: # %vector.body5393.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_293: # %vector.body5043 +.LBB5_293: # %vector.body5393 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_293 -# %bb.294: # %vector.body5048.preheader +# %bb.294: # %vector.body5398.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_295: # %vector.body5048 +.LBB5_295: # %vector.body5398 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3992,30 +3971,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_295 b .LBB5_573 -.LBB5_296: # %vector.body5031.preheader +.LBB5_296: # %vector.body5381.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_297: # %vector.body5031 +.LBB5_297: # %vector.body5381 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_297 -# %bb.298: # %vector.body5036.preheader +# %bb.298: # %vector.body5386.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_299: # %vector.body5036 +.LBB5_299: # %vector.body5386 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4047,30 +4026,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_299 b .LBB5_573 -.LBB5_300: # %vector.body5019.preheader +.LBB5_300: # %vector.body5369.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_301: # %vector.body5019 +.LBB5_301: # %vector.body5369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_301 -# %bb.302: # %vector.body5024.preheader +# %bb.302: # %vector.body5374.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_303: # %vector.body5024 +.LBB5_303: # %vector.body5374 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4102,69 +4081,66 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_303 b .LBB5_573 -.LBB5_304: # %vector.body5004.preheader +.LBB5_304: # %vector.body5348.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_305: # %vector.body5004 +.LBB5_305: # %vector.body5348 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_305 -# %bb.306: # %vector.body5009.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.306: # %vector.body5353.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_307: # %vector.body5009 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_307: # %vector.body5353 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_307 -# %bb.308: # %vector.body5014.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.308: # %vector.body5361.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_309: # %vector.body5014 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_309: # %vector.body5361 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_309 b .LBB5_573 .LBB5_310: # %.preheader.i1469.preheader @@ -4181,254 +4157,241 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_311: # %vector.body4984 +.LBB5_311: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_311 -# %bb.312: # %vector.body4989.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.312: # %vector.body5324.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_313: # %vector.body4989 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_313: # %vector.body5324 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_313 -# %bb.314: # %vector.body4994.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.314: # %vector.body5332.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_315: # %vector.body4994 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_315: # %vector.body5332 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_315 -# %bb.316: # %vector.body4999.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.316: # %vector.body5340.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_317: # %vector.body4999 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_317: # %vector.body5340 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_317 b .LBB5_573 -.LBB5_318: # %vector.body4964.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_319: # %vector.body4964 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_318: # %vector.body5293.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_319: # %vector.body5293 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_319 -# %bb.320: # %vector.body4969.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1072 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 +# %bb.320: # %vector.body5301.preheader + lu12i.w $a3, 31 + ori $a3, $a3, 1072 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 xvldi $xr0, -1424 -.LBB5_321: # %vector.body4969 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_321 -# %bb.322: # %vector.body4974.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2128 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 -.LBB5_323: # %vector.body4974 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_323 -# %bb.324: # %vector.body4979.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_325: # %vector.body4979 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_321: # %vector.body5301 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_321 +# %bb.322: # %vector.body5306.preheader + lu12i.w $a3, 62 + ori $a3, $a3, 2128 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 +.LBB5_323: # %vector.body5306 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_323 +# %bb.324: # %vector.body5311.preheader + lu12i.w $a3, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 3160 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_325: # %vector.body5311 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_325 b .LBB5_573 -.LBB5_326: # %vector.body4944.preheader +.LBB5_326: # %vector.body5264.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_327: # %vector.body4944 +.LBB5_327: # %vector.body5264 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_327 -# %bb.328: # %vector.body4949.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.328: # %vector.body5269.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_329: # %vector.body4949 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_329: # %vector.body5269 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_329 -# %bb.330: # %vector.body4954.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.330: # %vector.body5277.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_331: # %vector.body4954 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_331: # %vector.body5277 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_331 -# %bb.332: # %vector.body4959.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.332: # %vector.body5285.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_333: # %vector.body4959 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_333: # %vector.body5285 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_333 b .LBB5_573 .LBB5_334: # %.preheader.i1529.preheader @@ -4445,19 +4408,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_335: # %vector.body4934 +.LBB5_335: # %vector.body5254 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_335 -# %bb.336: # %vector.body4939.preheader +# %bb.336: # %vector.body5259.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_337: # %vector.body4939 +.LBB5_337: # %vector.body5259 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -4915,69 +4878,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_358 b .LBB5_573 -.LBB5_359: # %vector.body4850.preheader +.LBB5_359: # %vector.body5164.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_360: # %vector.body4850 +.LBB5_360: # %vector.body5164 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_360 -# %bb.361: # %vector.body4855.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.361: # %vector.body5169.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_362: # %vector.body4855 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a5 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_362: # %vector.body5169 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a3, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_362 -# %bb.363: # %vector.body4860.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.363: # %vector.body5177.preheader + lu12i.w $a3, 62 + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 ori $a1, $a1, 3328 -.LBB5_364: # %vector.body4860 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a4 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 +.LBB5_364: # %vector.body5177 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a2, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a1, $a1, -4 + addi.d $a2, $a2, 16 bnez $a1, .LBB5_364 # %bb.365: # %.preheader.i1631.preheader lu12i.w $a1, 156 @@ -5071,50 +5031,50 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_368 b .LBB5_573 -.LBB5_369: # %vector.body4830.preheader +.LBB5_369: # %vector.body5144.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_370: # %vector.body4830 +.LBB5_370: # %vector.body5144 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_370 -# %bb.371: # %vector.body4835.preheader +# %bb.371: # %vector.body5149.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_372: # %vector.body4835 +.LBB5_372: # %vector.body5149 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_372 -# %bb.373: # %vector.body4840.preheader +# %bb.373: # %vector.body5154.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_374: # %vector.body4840 +.LBB5_374: # %vector.body5154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_374 -# %bb.375: # %vector.body4845.preheader +# %bb.375: # %vector.body5159.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_376: # %vector.body4845 +.LBB5_376: # %vector.body5159 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5122,7 +5082,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_376 b .LBB5_573 -.LBB5_377: # %vector.body4810.preheader +.LBB5_377: # %vector.body5124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -5131,43 +5091,43 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_378: # %vector.body4810 +.LBB5_378: # %vector.body5124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_378 -# %bb.379: # %vector.body4815.preheader +# %bb.379: # %vector.body5129.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_380: # %vector.body4815 +.LBB5_380: # %vector.body5129 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_380 -# %bb.381: # %vector.body4820.preheader +# %bb.381: # %vector.body5134.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_382: # %vector.body4820 +.LBB5_382: # %vector.body5134 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_382 -# %bb.383: # %vector.body4825.preheader +# %bb.383: # %vector.body5139.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_384: # %vector.body4825 +.LBB5_384: # %vector.body5139 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5189,87 +5149,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_386: # %vector.body4790 +.LBB5_386: # %vector.body5095 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_386 -# %bb.387: # %vector.body4795.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.387: # %vector.body5100.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_388: # %vector.body4795 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_388: # %vector.body5100 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_388 -# %bb.389: # %vector.body4800.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.389: # %vector.body5108.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_390: # %vector.body4800 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_390: # %vector.body5108 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_390 -# %bb.391: # %vector.body4805.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.391: # %vector.body5116.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_392: # %vector.body4805 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_392: # %vector.body5116 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_392 b .LBB5_573 .LBB5_393: # %.preheader.i1701.preheader @@ -5286,14 +5241,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_394: # %vector.body4775 +.LBB5_394: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_394 -# %bb.395: # %vector.body4780.preheader +# %bb.395: # %vector.body5085.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -5301,19 +5256,19 @@ init: # @init lu12i.w $a3, 219235 ori $a3, $a3, 1981 xvreplgr2vr.w $xr0, $a3 -.LBB5_396: # %vector.body4780 +.LBB5_396: # %vector.body5085 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_396 -# %bb.397: # %vector.body4785.preheader +# %bb.397: # %vector.body5090.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_398: # %vector.body4785 +.LBB5_398: # %vector.body5090 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5335,87 +5290,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_400: # %vector.body4755 +.LBB5_400: # %vector.body5051 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_400 -# %bb.401: # %vector.body4760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.401: # %vector.body5056.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_402: # %vector.body4760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_402: # %vector.body5056 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_402 -# %bb.403: # %vector.body4765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.403: # %vector.body5064.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_404: # %vector.body4765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_404: # %vector.body5064 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_404 -# %bb.405: # %vector.body4770.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.405: # %vector.body5072.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_406: # %vector.body4770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_406: # %vector.body5072 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_406 b .LBB5_573 .LBB5_407: # %.preheader.i1740.preheader @@ -5432,19 +5382,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_408: # %vector.body4745 +.LBB5_408: # %vector.body5041 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_408 -# %bb.409: # %vector.body4750.preheader +# %bb.409: # %vector.body5046.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_410: # %vector.body4750 +.LBB5_410: # %vector.body5046 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5452,21 +5402,21 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_410 b .LBB5_573 -.LBB5_411: # %vector.body4725.preheader +.LBB5_411: # %vector.body5018.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_412: # %vector.body4725 +.LBB5_412: # %vector.body5018 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_412 -# %bb.413: # %vector.body4730.preheader +# %bb.413: # %vector.body5023.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 @@ -5474,49 +5424,48 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr1, $a4 -.LBB5_414: # %vector.body4730 +.LBB5_414: # %vector.body5023 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a2, -32 xvst $xr1, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_414 -# %bb.415: # %vector.body4735.preheader +# %bb.415: # %vector.body5028.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_416: # %vector.body4735 +.LBB5_416: # %vector.body5028 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_416 -# %bb.417: # %vector.body4740.preheader +# %bb.417: # %vector.body5033.preheader lu12i.w $a2, 93 - ori $a2, $a2, 3156 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_418: # %vector.body4740 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_418: # %vector.body5033 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_418 b .LBB5_573 .LBB5_419: # %.preheader.i1769.preheader @@ -5533,7 +5482,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_420: # %vector.body4720 +.LBB5_420: # %vector.body5013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5555,7 +5504,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_422: # %vector.body4715 +.LBB5_422: # %vector.body5008 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5563,14 +5512,14 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_422 b .LBB5_573 -.LBB5_423: # %vector.body4700.preheader +.LBB5_423: # %vector.body4993.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_424: # %vector.body4700 +.LBB5_424: # %vector.body4993 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5663,14 +5612,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_428 b .LBB5_573 -.LBB5_429: # %vector.body4685.preheader +.LBB5_429: # %vector.body4978.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_430: # %vector.body4685 +.LBB5_430: # %vector.body4978 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5763,32 +5712,31 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_434 b .LBB5_573 -.LBB5_435: # %vector.body4663.preheader +.LBB5_435: # %vector.body4947.preheader + pcalau12i $s1, %pc_hi20(.LCPI5_0) + vld $vr0, $s1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) - addi.d $a0, $s0, 4 - ori $a1, $zero, 2 - lu12i.w $s1, 7 - ori $a2, $s1, 3328 -.LBB5_436: # %vector.body4663 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_436 + addi.d $a0, $s0, 8 + lu12i.w $s2, 7 + ori $a1, $s2, 3328 +.LBB5_436: # %vector.body4947 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_436 # %bb.437: # %.preheader.i1826.preheader lu12i.w $a1, 31 ori $a0, $a1, 1040 @@ -5799,54 +5747,50 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 2104 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_438: # %vector.body4668 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_438 -# %bb.439: # %vector.body4673.preheader + ori $a1, $s2, 3328 +.LBB5_438: # %vector.body4955 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_438 +# %bb.439: # %vector.body4963.preheader lu12i.w $a0, 93 - ori $a0, $a0, 3156 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 3160 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_440: # %vector.body4673 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_440 + ori $a1, $s2, 3328 +.LBB5_440: # %vector.body4963 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_440 # %bb.441: # %.preheader.i1840.preheader lu12i.w $a0, 125 ori $a0, $a0, 128 @@ -5905,30 +5849,30 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_442 b .LBB5_573 -.LBB5_443: # %vector.body4639.preheader +.LBB5_443: # %vector.body4923.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_444: # %vector.body4639 +.LBB5_444: # %vector.body4923 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_444 -# %bb.445: # %vector.body4644.preheader +# %bb.445: # %vector.body4928.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 -.LBB5_446: # %vector.body4644 +.LBB5_446: # %vector.body4928 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5959,14 +5903,14 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_446 -# %bb.447: # %vector.body4651.preheader +# %bb.447: # %vector.body4935.preheader lu12i.w $a4, 62 - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 2096 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_448: # %vector.body4651 +.LBB5_448: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5997,12 +5941,12 @@ init: # @init addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 bnez $a3, .LBB5_448 -# %bb.449: # %vector.body4658.preheader +# %bb.449: # %vector.body4942.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_450: # %vector.body4658 +.LBB5_450: # %vector.body4942 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6010,154 +5954,148 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_450 b .LBB5_573 -.LBB5_451: # %vector.body4624.preheader +.LBB5_451: # %vector.body4902.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_452: # %vector.body4624 +.LBB5_452: # %vector.body4902 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_452 -# %bb.453: # %vector.body4629.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.453: # %vector.body4907.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_454: # %vector.body4629 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_454: # %vector.body4907 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_454 -# %bb.455: # %vector.body4634.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.455: # %vector.body4915.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_456: # %vector.body4634 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_456: # %vector.body4915 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_456 b .LBB5_573 -.LBB5_457: # %vector.body4599.preheader +.LBB5_457: # %vector.body4871.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_458: # %vector.body4599 +.LBB5_458: # %vector.body4871 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_458 -# %bb.459: # %vector.body4604.preheader +# %bb.459: # %vector.body4876.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_460: # %vector.body4604 +.LBB5_460: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_460 -# %bb.461: # %vector.body4609.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.461: # %vector.body4881.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_462: # %vector.body4609 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_462: # %vector.body4881 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_462 -# %bb.463: # %vector.body4614.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.463: # %vector.body4889.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_464: # %vector.body4614 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_464 -# %bb.465: # %vector.body4619.preheader + ori $a3, $a1, 3328 +.LBB5_464: # %vector.body4889 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 + bnez $a3, .LBB5_464 +# %bb.465: # %vector.body4897.preheader lu12i.w $a2, 125 ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3264 -.LBB5_466: # %vector.body4619 +.LBB5_466: # %vector.body4897 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6165,45 +6103,45 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_466 b .LBB5_573 -.LBB5_467: # %vector.body4574.preheader +.LBB5_467: # %vector.body4843.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_468: # %vector.body4574 +.LBB5_468: # %vector.body4843 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_468 -# %bb.469: # %vector.body4579.preheader +# %bb.469: # %vector.body4848.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_470: # %vector.body4579 +.LBB5_470: # %vector.body4848 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_470 -# %bb.471: # %vector.body4584.preheader +# %bb.471: # %vector.body4853.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_472: # %vector.body4584 +.LBB5_472: # %vector.body4853 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_472 -# %bb.473: # %vector.body4589.preheader +# %bb.473: # %vector.body4858.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a2, $a0, $a2 @@ -6211,37 +6149,36 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_474: # %vector.body4589 +.LBB5_474: # %vector.body4858 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_474 -# %bb.475: # %vector.body4594.preheader +# %bb.475: # %vector.body4863.preheader lu12i.w $a2, 125 - ori $a2, $a2, 132 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_476: # %vector.body4594 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_476: # %vector.body4863 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_476 b .LBB5_573 .LBB5_477: # %.preheader.i1927.preheader @@ -6258,74 +6195,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_478: # %vector.body4554 +.LBB5_478: # %vector.body4817 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_478 -# %bb.479: # %vector.body4559.preheader +# %bb.479: # %vector.body4822.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_480: # %vector.body4559 +.LBB5_480: # %vector.body4822 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_480 -# %bb.481: # %vector.body4564.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.481: # %vector.body4827.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_482: # %vector.body4564 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_482: # %vector.body4827 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_482 -# %bb.483: # %vector.body4569.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.483: # %vector.body4835.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_484: # %vector.body4569 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_484: # %vector.body4835 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_484 b .LBB5_573 .LBB5_485: # %.preheader.i1949.preheader @@ -6461,669 +6395,638 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_490 b .LBB5_573 -.LBB5_491: # %vector.body4519.preheader +.LBB5_491: # %vector.body4773.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_492: # %vector.body4519 +.LBB5_492: # %vector.body4773 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_492 -# %bb.493: # %vector.body4524.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.493: # %vector.body4778.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_494: # %vector.body4524 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_494: # %vector.body4778 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_494 -# %bb.495: # %vector.body4529.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.495: # %vector.body4786.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_496: # %vector.body4529 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_496: # %vector.body4786 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_496 -# %bb.497: # %vector.body4534.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.497: # %vector.body4794.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_498: # %vector.body4534 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_498: # %vector.body4794 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_498 b .LBB5_573 -.LBB5_499: # %vector.body4489.preheader +.LBB5_499: # %vector.body4734.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_500: # %vector.body4489 +.LBB5_500: # %vector.body4734 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_500 -# %bb.501: # %vector.body4494.preheader +# %bb.501: # %vector.body4739.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_502: # %vector.body4494 +.LBB5_502: # %vector.body4739 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_502 -# %bb.503: # %vector.body4499.preheader +# %bb.503: # %vector.body4744.preheader lu12i.w $a2, 46 ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_504: # %vector.body4499 +.LBB5_504: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_504 -# %bb.505: # %vector.body4504.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.505: # %vector.body4749.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_506: # %vector.body4504 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_506: # %vector.body4749 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_506 -# %bb.507: # %vector.body4509.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.507: # %vector.body4757.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_508: # %vector.body4509 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_508: # %vector.body4757 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_508 -# %bb.509: # %vector.body4514.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.509: # %vector.body4765.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_510: # %vector.body4514 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_510: # %vector.body4765 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_510 b .LBB5_573 -.LBB5_511: # %vector.body4459.preheader +.LBB5_511: # %vector.body4695.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_512: # %vector.body4459 +.LBB5_512: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_512 -# %bb.513: # %vector.body4464.preheader +# %bb.513: # %vector.body4700.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_514: # %vector.body4464 +.LBB5_514: # %vector.body4700 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_514 -# %bb.515: # %vector.body4469.preheader +# %bb.515: # %vector.body4705.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_516: # %vector.body4469 +.LBB5_516: # %vector.body4705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_516 -# %bb.517: # %vector.body4474.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.517: # %vector.body4710.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_518: # %vector.body4474 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_518: # %vector.body4710 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_518 -# %bb.519: # %vector.body4479.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.519: # %vector.body4718.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_520: # %vector.body4479 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_520: # %vector.body4718 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_520 -# %bb.521: # %vector.body4484.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.521: # %vector.body4726.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_522: # %vector.body4484 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_522: # %vector.body4726 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_522 b .LBB5_573 -.LBB5_523: # %vector.body4429.preheader +.LBB5_523: # %vector.body4656.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_524: # %vector.body4429 +.LBB5_524: # %vector.body4656 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_524 -# %bb.525: # %vector.body4434.preheader +# %bb.525: # %vector.body4661.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_526: # %vector.body4434 +.LBB5_526: # %vector.body4661 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_526 -# %bb.527: # %vector.body4439.preheader +# %bb.527: # %vector.body4666.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_528: # %vector.body4439 +.LBB5_528: # %vector.body4666 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_528 -# %bb.529: # %vector.body4444.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.529: # %vector.body4671.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_530: # %vector.body4444 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_530: # %vector.body4671 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_530 -# %bb.531: # %vector.body4449.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.531: # %vector.body4679.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_532: # %vector.body4449 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_532: # %vector.body4679 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_532 -# %bb.533: # %vector.body4454.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.533: # %vector.body4687.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_534: # %vector.body4454 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_534: # %vector.body4687 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_534 b .LBB5_573 -.LBB5_535: # %vector.body4404.preheader +.LBB5_535: # %vector.body4622.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_536: # %vector.body4404 +.LBB5_536: # %vector.body4622 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_536 -# %bb.537: # %vector.body4409.preheader +# %bb.537: # %vector.body4627.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_538: # %vector.body4409 +.LBB5_538: # %vector.body4627 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_538 -# %bb.539: # %vector.body4414.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.539: # %vector.body4632.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_540: # %vector.body4414 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_540: # %vector.body4632 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_540 -# %bb.541: # %vector.body4419.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.541: # %vector.body4640.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_542: # %vector.body4419 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_542: # %vector.body4640 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_542 -# %bb.543: # %vector.body4424.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.543: # %vector.body4648.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_544: # %vector.body4424 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_544: # %vector.body4648 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_544 b .LBB5_573 -.LBB5_545: # %vector.body4389.preheader +.LBB5_545: # %vector.body4601.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_546: # %vector.body4389 +.LBB5_546: # %vector.body4601 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_546 -# %bb.547: # %vector.body4394.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.547: # %vector.body4606.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_548: # %vector.body4394 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_548: # %vector.body4606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_548 -# %bb.549: # %vector.body4399.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.549: # %vector.body4614.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_550: # %vector.body4399 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_550: # %vector.body4614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_550 b .LBB5_573 -.LBB5_551: # %vector.body4374.preheader +.LBB5_551: # %vector.body4580.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_552: # %vector.body4374 +.LBB5_552: # %vector.body4580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_552 -# %bb.553: # %vector.body4379.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.553: # %vector.body4585.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_554: # %vector.body4379 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_554: # %vector.body4585 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_554 -# %bb.555: # %vector.body4384.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.555: # %vector.body4593.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_556: # %vector.body4384 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_556: # %vector.body4593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_556 b .LBB5_573 .LBB5_557: # %.preheader.i2121.preheader @@ -7140,19 +7043,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_558: # %vector.body4364 +.LBB5_558: # %vector.body4570 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_558 -# %bb.559: # %vector.body4369.preheader +# %bb.559: # %vector.body4575.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_560: # %vector.body4369 +.LBB5_560: # %vector.body4575 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7174,7 +7077,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_562: # %vector.body4359 +.LBB5_562: # %vector.body4565 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7196,7 +7099,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_564: # %vector.body4354 +.LBB5_564: # %vector.body4560 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7204,31 +7107,30 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_564 b .LBB5_573 -.LBB5_565: # %vector.body4349.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_566: # %vector.body4349 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_566 +.LBB5_565: # %vector.body4552.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_566: # %vector.body4552 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_566 b .LBB5_573 .LBB5_567: # %.preheader.i2154.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7377,43 +7279,43 @@ init: # @init bnez $a2, .LBB5_572 .LBB5_573: # %set1d.exit1086 move $a0, $zero - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $fp, $sp, 64 # 8-byte Folded Reload - ld.d $ra, $sp, 72 # 8-byte Folded Reload - addi.d $sp, $sp, 80 + ld.d $s2, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload + ld.d $s0, $sp, 72 # 8-byte Folded Reload + ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $ra, $sp, 88 # 8-byte Folded Reload + addi.d $sp, $sp, 96 ret .LBB5_574: # %.preheader.i2177.preheader pcalau12i $a0, %pc_hi20(global_data+640192) addi.d $a0, $a0, %pc_lo12(global_data+640192) b .LBB5_345 -.LBB5_575: # %vector.body4325.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_576: # %vector.body4325 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_576 +.LBB5_575: # %vector.body4525.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_576: # %vector.body4525 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_576 b .LBB5_573 -.LBB5_577: # %vector.body4320.preheader +.LBB5_577: # %vector.body4520.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 @@ -7421,7 +7323,7 @@ init: # @init lu12i.w $a2, 260096 ori $a2, $a2, 8 xvreplgr2vr.w $xr0, $a2 -.LBB5_578: # %vector.body4320 +.LBB5_578: # %vector.body4520 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7429,162 +7331,155 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_578 b .LBB5_573 -.LBB5_579: # %vector.body4310.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_580: # %vector.body4310 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_579: # %vector.body4504.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_580: # %vector.body4504 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_580 -# %bb.581: # %vector.body4315.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_582: # %vector.body4315 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.581: # %vector.body4512.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_582: # %vector.body4512 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_582 b .LBB5_573 -.LBB5_583: # %vector.body4305.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_584: # %vector.body4305 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_584 +.LBB5_583: # %vector.body4496.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_584: # %vector.body4496 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_584 b .LBB5_573 -.LBB5_585: # %vector.body4300.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_586: # %vector.body4300 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_586 +.LBB5_585: # %vector.body4488.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_586: # %vector.body4488 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_586 b .LBB5_573 -.LBB5_587: # %vector.body4295.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_588: # %vector.body4295 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_588 +.LBB5_587: # %vector.body4480.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_588: # %vector.body4480 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_588 b .LBB5_573 -.LBB5_589: # %vector.body4290.preheader +.LBB5_589: # %vector.body4472.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_590: # %vector.body4290 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_590 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_590: # %vector.body4472 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_590 b .LBB5_607 .LBB5_591: # %.preheader.i2230.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7603,79 +7498,74 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a1, $fp, $a0 - ori $a2, $zero, 2 + ori $a0, $a0, 2104 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + add.d $a2, $fp, $a0 lu12i.w $a0, 7 ori $a3, $a0, 3328 -.LBB5_592: # %vector.body4275 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_592: # %vector.body4448 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_592 -# %bb.593: # %vector.body4280.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.593: # %vector.body4456.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_594: # %vector.body4280 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_594: # %vector.body4456 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_594 -# %bb.595: # %vector.body4285.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.595: # %vector.body4464.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_596: # %vector.body4285 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_596: # %vector.body4464 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_596 b .LBB5_573 .LBB5_597: # %.preheader34.i2253.preheader @@ -7734,42 +7624,41 @@ init: # @init lu12i.w $a1, 220 ori $a1, $a1, 1212 b .LBB5_630 -.LBB5_600: # %vector.body4263.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_601: # %vector.body4263 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_601 - b .LBB5_573 -.LBB5_602: # %vector.body4256.preheader +.LBB5_600: # %vector.body4433.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_601: # %vector.body4433 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_601 + b .LBB5_573 +.LBB5_602: # %vector.body4426.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_603: # %vector.body4256 +.LBB5_603: # %vector.body4426 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -7804,45 +7693,44 @@ init: # @init pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) b .LBB5_610 -.LBB5_605: # %vector.body4251.preheader +.LBB5_605: # %vector.body4418.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_606: # %vector.body4251 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_606 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_606: # %vector.body4418 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_606 .LBB5_607: # %set1d.exit2229 lu12i.w $a1, 31 ori $a1, $a1, 1020 lu12i.w $a2, -262144 b .LBB5_687 -.LBB5_608: # %vector.body4246.preheader +.LBB5_608: # %vector.body4413.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_609: # %vector.body4246 +.LBB5_609: # %vector.body4413 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7855,14 +7743,14 @@ init: # @init add.d $a0, $a0, $a2 ori $a2, $a1, 1024 b .LBB5_346 -.LBB5_611: # %vector.body4241.preheader +.LBB5_611: # %vector.body4408.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_612: # %vector.body4241 +.LBB5_612: # %vector.body4408 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7886,147 +7774,141 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 b .LBB5_573 -.LBB5_614: # %vector.body4216.preheader +.LBB5_614: # %vector.body4374.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_615: # %vector.body4216 +.LBB5_615: # %vector.body4374 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_615 -# %bb.616: # %vector.body4221.preheader +# %bb.616: # %vector.body4379.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_617: # %vector.body4221 +.LBB5_617: # %vector.body4379 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_617 -# %bb.618: # %vector.body4226.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.618: # %vector.body4384.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_619: # %vector.body4226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_619: # %vector.body4384 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_619 -# %bb.620: # %vector.body4231.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.620: # %vector.body4392.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_621: # %vector.body4231 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_621: # %vector.body4392 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_621 -# %bb.622: # %vector.body4236.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.622: # %vector.body4400.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_623: # %vector.body4236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_623: # %vector.body4400 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_623 b .LBB5_573 -.LBB5_624: # %vector.body4211.preheader +.LBB5_624: # %vector.body4366.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_625: # %vector.body4211 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_625 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_625: # %vector.body4366 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_625 # %bb.626: # %set1d.exit2327 lu12i.w $a1, 31 b .LBB5_686 -.LBB5_627: # %vector.body4204.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_627: # %vector.body4359.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_628: # %vector.body4204 +.LBB5_628: # %vector.body4359 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8075,81 +7957,77 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ori $a0, $s0, 1044 + ori $a0, $s0, 1048 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_632: # %vector.body4199 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_632 - b .LBB5_573 -.LBB5_633: # %vector.body4189.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_634: # %vector.body4189 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_634 -# %bb.635: # %vector.body4194.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_636: # %vector.body4194 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_632: # %vector.body4351 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_632 + b .LBB5_573 +.LBB5_633: # %vector.body4335.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_634: # %vector.body4335 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_634 +# %bb.635: # %vector.body4343.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_636: # %vector.body4343 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_636 b .LBB5_573 .LBB5_637: # %.preheader34.i2352.preheader @@ -8248,26 +8126,26 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_640 b .LBB5_573 -.LBB5_641: # %vector.body4167.preheader +.LBB5_641: # %vector.body4313.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_642: # %vector.body4167 +.LBB5_642: # %vector.body4313 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_642 -# %bb.643: # %vector.body4172.preheader +# %bb.643: # %vector.body4318.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_644: # %vector.body4172 +.LBB5_644: # %vector.body4318 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8275,78 +8153,75 @@ init: # @init addi.d $a2, $a2, 64 bnez $a1, .LBB5_644 b .LBB5_653 -.LBB5_645: # %vector.body4157.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_646: # %vector.body4157 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_645: # %vector.body4297.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_646: # %vector.body4297 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_646 -# %bb.647: # %vector.body4162.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_648: # %vector.body4162 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.647: # %vector.body4305.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_648: # %vector.body4305 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_648 b .LBB5_573 -.LBB5_649: # %vector.body4147.preheader +.LBB5_649: # %vector.body4287.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_650: # %vector.body4147 +.LBB5_650: # %vector.body4287 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_650 -# %bb.651: # %vector.body4152.preheader +# %bb.651: # %vector.body4292.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_652: # %vector.body4152 +.LBB5_652: # %vector.body4292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8359,134 +8234,128 @@ init: # @init lu12i.w $a2, 260096 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_654: # %vector.body4132.preheader +.LBB5_654: # %vector.body4266.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_655: # %vector.body4132 +.LBB5_655: # %vector.body4266 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_655 -# %bb.656: # %vector.body4137.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_657: # %vector.body4137 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_657 -# %bb.658: # %vector.body4142.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_659: # %vector.body4142 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.656: # %vector.body4271.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a1, 3328 +.LBB5_657: # %vector.body4271 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_657 +# %bb.658: # %vector.body4279.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_659: # %vector.body4279 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_659 b .LBB5_573 -.LBB5_660: # %vector.body4117.preheader +.LBB5_660: # %vector.body4245.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_661: # %vector.body4117 +.LBB5_661: # %vector.body4245 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_661 -# %bb.662: # %vector.body4122.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.662: # %vector.body4250.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_663: # %vector.body4122 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_663: # %vector.body4250 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_663 -# %bb.664: # %vector.body4127.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.664: # %vector.body4258.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_665: # %vector.body4127 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_665: # %vector.body4258 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_665 b .LBB5_573 .LBB5_666: # %.preheader.i2421.preheader @@ -8503,74 +8372,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_667: # %vector.body4097 +.LBB5_667: # %vector.body4219 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_667 -# %bb.668: # %vector.body4102.preheader +# %bb.668: # %vector.body4224.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_669: # %vector.body4102 +.LBB5_669: # %vector.body4224 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_669 -# %bb.670: # %vector.body4107.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.670: # %vector.body4229.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_671: # %vector.body4107 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_671: # %vector.body4229 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_671 -# %bb.672: # %vector.body4112.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.672: # %vector.body4237.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_673: # %vector.body4112 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_673: # %vector.body4237 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_673 b .LBB5_573 .LBB5_674: # %.preheader.i2443.preheader @@ -8719,69 +8585,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_679 b .LBB5_573 -.LBB5_680: # %vector.body4063.preheader +.LBB5_680: # %vector.body4179.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 xvldi $xr0, -1424 -.LBB5_681: # %vector.body4063 +.LBB5_681: # %vector.body4179 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a3, $a3, -16 addi.d $a1, $a1, 64 bnez $a3, .LBB5_681 -# %bb.682: # %vector.body4068.preheader +# %bb.682: # %vector.body4184.preheader lu12i.w $a1, 31 - ori $a3, $a1, 1044 - add.d $a3, $a0, $a3 - ori $a4, $zero, 2 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a4, $a1, 1048 + add.d $a4, $a0, $a4 ori $a5, $a2, 3328 -.LBB5_683: # %vector.body4068 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - addi.d $a7, $a4, -1 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa0, $a7 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a5, $a5, -2 - addi.w $a4, $a4, 2 +.LBB5_683: # %vector.body4184 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a4, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a5, $a5, -4 + addi.d $a4, $a4, 16 bnez $a5, .LBB5_683 -# %bb.684: # %vector.body4073.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2100 +# %bb.684: # %vector.body4192.preheader + lu12i.w $a4, 62 + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a3, $a4, 2104 add.d $a3, $a0, $a3 - ori $a4, $zero, 2 ori $a2, $a2, 3328 -.LBB5_685: # %vector.body4073 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a4, 31, 0 - addi.d $a6, $a4, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a2, $a2, -2 - addi.w $a4, $a4, 2 +.LBB5_685: # %vector.body4192 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a3, $a3, 16 bnez $a2, .LBB5_685 .LBB5_686: # %set1d.exit2479 ori $a1, $a1, 1020 @@ -8790,16 +8653,16 @@ init: # @init lu32i.d $a2, 0 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_688: # %vector.body4051.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_688: # %vector.body4167.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_689: # %vector.body4051 +.LBB5_689: # %vector.body4167 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8830,12 +8693,12 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_689 -# %bb.690: # %vector.body4058.preheader +# %bb.690: # %vector.body4174.preheader pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 xvldi $xr0, -1424 -.LBB5_691: # %vector.body4058 +.LBB5_691: # %vector.body4174 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -8843,28 +8706,28 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_691 b .LBB5_573 -.LBB5_692: # %vector.body4039.preheader +.LBB5_692: # %vector.body4155.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_693: # %vector.body4039 +.LBB5_693: # %vector.body4155 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_693 -# %bb.694: # %vector.body4044.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.694: # %vector.body4160.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_695: # %vector.body4044 +.LBB5_695: # %vector.body4160 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8904,15 +8767,15 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_697: # %vector.body4032 +.LBB5_697: # %vector.body4148 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8944,28 +8807,28 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_697 b .LBB5_573 -.LBB5_698: # %vector.body4020.preheader +.LBB5_698: # %vector.body4136.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_699: # %vector.body4020 +.LBB5_699: # %vector.body4136 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_699 -# %bb.700: # %vector.body4025.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.700: # %vector.body4141.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_701: # %vector.body4025 +.LBB5_701: # %vector.body4141 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8997,30 +8860,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_701 b .LBB5_573 -.LBB5_702: # %vector.body4008.preheader +.LBB5_702: # %vector.body4124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_703: # %vector.body4008 +.LBB5_703: # %vector.body4124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_703 -# %bb.704: # %vector.body4013.preheader +# %bb.704: # %vector.body4129.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_705: # %vector.body4013 +.LBB5_705: # %vector.body4129 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9052,30 +8915,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_705 b .LBB5_573 -.LBB5_706: # %vector.body3996.preheader +.LBB5_706: # %vector.body4112.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_707: # %vector.body3996 +.LBB5_707: # %vector.body4112 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_707 -# %bb.708: # %vector.body4001.preheader +# %bb.708: # %vector.body4117.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_709: # %vector.body4001 +.LBB5_709: # %vector.body4117 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9107,7 +8970,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_709 b .LBB5_573 -.LBB5_710: # %vector.body3971.preheader +.LBB5_710: # %vector.body4081.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 @@ -9115,7 +8978,7 @@ init: # @init ori $a1, $s0, 3328 xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill -.LBB5_711: # %vector.body3971 +.LBB5_711: # %vector.body4081 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9123,63 +8986,60 @@ init: # @init addi.d $a1, $a1, -16 addi.d $a0, $a0, 64 bnez $a1, .LBB5_711 -# %bb.712: # %vector.body3976.preheader +# %bb.712: # %vector.body4086.preheader lu12i.w $a1, 31 - ori $a0, $a1, 1044 - add.d $a0, $fp, $a0 - ori $a2, $zero, 2 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a2, $a1, 1048 + add.d $a2, $fp, $a2 ori $a3, $s0, 3328 -.LBB5_713: # %vector.body3976 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_713: # %vector.body4086 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_713 -# %bb.714: # %vector.body3981.preheader - lu12i.w $a0, 62 - ori $a0, $a0, 2100 +# %bb.714: # %vector.body4094.preheader + lu12i.w $a2, 62 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a2, 2104 add.d $a0, $fp, $a0 - ori $a2, $zero, 2 - ori $a3, $s0, 3328 -.LBB5_715: # %vector.body3981 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_715 -# %bb.716: # %vector.body3986.preheader + ori $a2, $s0, 3328 +.LBB5_715: # %vector.body4094 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a0, $a0, 16 + bnez $a2, .LBB5_715 +# %bb.716: # %vector.body4102.preheader lu12i.w $a0, 93 ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 xvldi $xr0, -1296 -.LBB5_717: # %vector.body3986 +.LBB5_717: # %vector.body4102 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -9198,7 +9058,7 @@ init: # @init ori $a0, $a0, 2500 add.d $a0, $fp, $a0 ori $a1, $s0, 3328 -.LBB5_719: # %vector.body3991 +.LBB5_719: # %vector.body4107 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9207,238 +9067,225 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_719 b .LBB5_573 -.LBB5_720: # %vector.body3946.preheader +.LBB5_720: # %vector.body4044.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_721: # %vector.body3946 +.LBB5_721: # %vector.body4044 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_721 -# %bb.722: # %vector.body3951.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.722: # %vector.body4049.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_723: # %vector.body3951 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_723: # %vector.body4049 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_723 -# %bb.724: # %vector.body3956.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.724: # %vector.body4057.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_725: # %vector.body3956 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_725: # %vector.body4057 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_725 -# %bb.726: # %vector.body3961.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.726: # %vector.body4065.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_727: # %vector.body3961 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_727: # %vector.body4065 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_727 -# %bb.728: # %vector.body3966.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.728: # %vector.body4073.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_729: # %vector.body3966 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_729: # %vector.body4073 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_729 b .LBB5_573 -.LBB5_730: # %vector.body3931.preheader +.LBB5_730: # %vector.body4023.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_731: # %vector.body3931 +.LBB5_731: # %vector.body4023 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_731 -# %bb.732: # %vector.body3936.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.732: # %vector.body4028.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_733: # %vector.body3936 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_733: # %vector.body4028 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_733 -# %bb.734: # %vector.body3941.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.734: # %vector.body4036.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_735: # %vector.body3941 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_735: # %vector.body4036 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_735 b .LBB5_573 -.LBB5_736: # %vector.body3921.preheader +.LBB5_736: # %vector.body4007.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_737: # %vector.body3921 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_737: # %vector.body4007 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_737 -# %bb.738: # %vector.body3926.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.738: # %vector.body4015.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_739: # %vector.body3926 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_739: # %vector.body4015 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_739 b .LBB5_573 .LBB5_740: # %.preheader.i2608.preheader @@ -9455,14 +9302,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_741: # %vector.body3911 +.LBB5_741: # %vector.body3997 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_741 -# %bb.742: # %vector.body3916.preheader +# %bb.742: # %vector.body4002.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -9470,7 +9317,7 @@ init: # @init lu12i.w $a2, 219235 ori $a2, $a2, 1981 xvreplgr2vr.w $xr0, $a2 -.LBB5_743: # %vector.body3916 +.LBB5_743: # %vector.body4002 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -9488,14 +9335,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_745: # %vector.body3904 +.LBB5_745: # %vector.body3990 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9527,248 +9374,237 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_745 b .LBB5_573 -.LBB5_746: # %vector.body3879.preheader +.LBB5_746: # %vector.body3959.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_747: # %vector.body3879 +.LBB5_747: # %vector.body3959 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_747 -# %bb.748: # %vector.body3884.preheader +# %bb.748: # %vector.body3964.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_749: # %vector.body3884 +.LBB5_749: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_749 -# %bb.750: # %vector.body3889.preheader +# %bb.750: # %vector.body3969.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_751: # %vector.body3889 +.LBB5_751: # %vector.body3969 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_751 -# %bb.752: # %vector.body3894.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.752: # %vector.body3974.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_753: # %vector.body3894 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_753: # %vector.body3974 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_753 -# %bb.754: # %vector.body3899.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.754: # %vector.body3982.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_755: # %vector.body3899 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_755: # %vector.body3982 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_755 b .LBB5_573 -.LBB5_756: # %vector.body3859.preheader +.LBB5_756: # %vector.body3930.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_757: # %vector.body3859 +.LBB5_757: # %vector.body3930 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_757 -# %bb.758: # %vector.body3864.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.758: # %vector.body3935.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_759: # %vector.body3864 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_759: # %vector.body3935 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_759 -# %bb.760: # %vector.body3869.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.760: # %vector.body3943.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_761: # %vector.body3869 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_761: # %vector.body3943 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_761 -# %bb.762: # %vector.body3874.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.762: # %vector.body3951.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_763: # %vector.body3874 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_763: # %vector.body3951 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_763 b .LBB5_573 -.LBB5_764: # %vector.body3844.preheader +.LBB5_764: # %vector.body3909.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_765: # %vector.body3844 +.LBB5_765: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_765 -# %bb.766: # %vector.body3849.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.766: # %vector.body3914.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_767: # %vector.body3849 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_767: # %vector.body3914 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_767 -# %bb.768: # %vector.body3854.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.768: # %vector.body3922.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_769: # %vector.body3854 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_769: # %vector.body3922 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_769 b .LBB5_573 .LBB5_770: # %.preheader.i2684.preheader @@ -9785,103 +9621,99 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_771: # %vector.body3829 +.LBB5_771: # %vector.body3888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_771 -# %bb.772: # %vector.body3834.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.772: # %vector.body3893.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_773: # %vector.body3834 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_773: # %vector.body3893 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_773 -# %bb.774: # %vector.body3839.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.774: # %vector.body3901.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_775: # %vector.body3839 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_775: # %vector.body3901 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_775 b .LBB5_573 -.LBB5_776: # %vector.body3819.preheader +.LBB5_776: # %vector.body3875.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 32 - lu12i.w $a2, 7 - ori $a3, $a2, 3328 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_777: # %vector.body3819 +.LBB5_777: # %vector.body3875 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 - addi.d $a1, $a1, 64 + addi.d $a2, $a2, 64 bnez $a3, .LBB5_777 -# %bb.778: # %vector.body3824.preheader - lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a0, $a0, $a1 - ori $a1, $zero, 2 - ori $a2, $a2, 3328 -.LBB5_779: # %vector.body3824 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_779 +# %bb.778: # %vector.body3880.preheader + lu12i.w $a2, 31 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 1048 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_779: # %vector.body3880 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_779 b .LBB5_573 .LBB5_780: # %.preheader.i2711.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9897,23 +9729,23 @@ init: # @init lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_781: # %vector.body3807 +.LBB5_781: # %vector.body3863 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_781 -# %bb.782: # %vector.body3812.preheader +# %bb.782: # %vector.body3868.preheader lu12i.w $a0, 62 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) pcalau12i $a2, %pc_hi20(.LCPI5_1) - xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_2) + xvld $xr1, $a2, %pc_lo12(.LCPI5_2) ori $a0, $a0, 2096 add.d $a0, $fp, $a0 ori $a1, $a1, 3328 -.LBB5_783: # %vector.body3812 +.LBB5_783: # %vector.body3868 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9959,142 +9791,135 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_785: # %vector.body3792 +.LBB5_785: # %vector.body3842 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_785 -# %bb.786: # %vector.body3797.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.786: # %vector.body3847.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_787: # %vector.body3797 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_787: # %vector.body3847 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_787 -# %bb.788: # %vector.body3802.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.788: # %vector.body3855.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_789: # %vector.body3802 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_789: # %vector.body3855 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_789 b .LBB5_573 -.LBB5_790: # %vector.body3782.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_791: # %vector.body3782 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_790: # %vector.body3826.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_791: # %vector.body3826 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_791 -# %bb.792: # %vector.body3787.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_793: # %vector.body3787 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.792: # %vector.body3834.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_793: # %vector.body3834 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_793 b .LBB5_573 -.LBB5_794: # %vector.body3770.preheader +.LBB5_794: # %vector.body3811.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_795: # %vector.body3770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_795 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_795: # %vector.body3811 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_795 # %bb.796: # %.preheader34.i2757.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -10160,127 +9985,121 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_799: # %vector.body3755 +.LBB5_799: # %vector.body3790 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_799 -# %bb.800: # %vector.body3760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.800: # %vector.body3795.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_801: # %vector.body3760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_801: # %vector.body3795 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_801 -# %bb.802: # %vector.body3765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.802: # %vector.body3803.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_803: # %vector.body3765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_803: # %vector.body3803 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_803 b .LBB5_573 -.LBB5_804: # %vector.body3740.preheader +.LBB5_804: # %vector.body3769.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_805: # %vector.body3740 +.LBB5_805: # %vector.body3769 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_805 -# %bb.806: # %vector.body3745.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.806: # %vector.body3774.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_807: # %vector.body3745 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_807: # %vector.body3774 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_807 -# %bb.808: # %vector.body3750.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.808: # %vector.body3782.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_809: # %vector.body3750 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_809: # %vector.body3782 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_809 b .LBB5_573 .LBB5_810: # %.preheader.i2797.preheader @@ -10293,14 +10112,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_811: # %vector.body3733 +.LBB5_811: # %vector.body3762 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10342,14 +10161,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_813: # %vector.body3726 +.LBB5_813: # %vector.body3755 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10391,14 +10210,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_815: # %vector.body3719 +.LBB5_815: # %vector.body3748 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10440,14 +10259,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_817: # %vector.body3712 +.LBB5_817: # %vector.body3741 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10489,14 +10308,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_819: # %vector.body3705 +.LBB5_819: # %vector.body3734 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10528,26 +10347,26 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_819 b .LBB5_573 -.LBB5_820: # %vector.body3695.preheader +.LBB5_820: # %vector.body3724.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_821: # %vector.body3695 +.LBB5_821: # %vector.body3724 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_821 -# %bb.822: # %vector.body3700.preheader +# %bb.822: # %vector.body3729.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_823: # %vector.body3700 +.LBB5_823: # %vector.body3729 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10555,95 +10374,92 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_823 b .LBB5_573 -.LBB5_824: # %vector.body3680.preheader +.LBB5_824: # %vector.body3703.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_825: # %vector.body3680 +.LBB5_825: # %vector.body3703 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_825 -# %bb.826: # %vector.body3685.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.826: # %vector.body3708.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_827: # %vector.body3685 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_827: # %vector.body3708 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_827 -# %bb.828: # %vector.body3690.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.828: # %vector.body3716.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_829: # %vector.body3690 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_829: # %vector.body3716 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_829 b .LBB5_573 -.LBB5_830: # %vector.body3668.preheader +.LBB5_830: # %vector.body3691.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_831: # %vector.body3668 +.LBB5_831: # %vector.body3691 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_831 -# %bb.832: # %vector.body3673.preheader +# %bb.832: # %vector.body3696.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_833: # %vector.body3673 +.LBB5_833: # %vector.body3696 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10675,16 +10491,16 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_833 b .LBB5_573 -.LBB5_834: # %vector.body3653.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_834: # %vector.body3674.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_835: # %vector.body3653 +.LBB5_835: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10715,7 +10531,7 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_835 -# %bb.836: # %vector.body3658.preheader +# %bb.836: # %vector.body3681.preheader pcalau12i $a1, %pc_hi20(global_data) addi.d $a1, $a1, %pc_lo12(global_data) lu12i.w $a2, 31 @@ -10723,20 +10539,20 @@ init: # @init add.d $a2, $a1, $a2 ori $a3, $a0, 3328 xvldi $xr0, -1424 -.LBB5_837: # %vector.body3658 +.LBB5_837: # %vector.body3681 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_837 -# %bb.838: # %vector.body3663.preheader +# %bb.838: # %vector.body3686.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 xvldi $xr0, -1296 -.LBB5_839: # %vector.body3663 +.LBB5_839: # %vector.body3686 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -10744,40 +10560,40 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_839 b .LBB5_573 -.LBB5_840: # %vector.body3638.preheader +.LBB5_840: # %vector.body3659.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_841: # %vector.body3638 +.LBB5_841: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_841 -# %bb.842: # %vector.body3643.preheader +# %bb.842: # %vector.body3664.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -3264 -.LBB5_843: # %vector.body3643 +.LBB5_843: # %vector.body3664 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_843 -# %bb.844: # %vector.body3648.preheader +# %bb.844: # %vector.body3669.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3265 -.LBB5_845: # %vector.body3648 +.LBB5_845: # %vector.body3669 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10785,82 +10601,78 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_845 b .LBB5_573 -.LBB5_846: # %vector.body3633.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_847: # %vector.body3633 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_847 - b .LBB5_573 -.LBB5_848: # %vector.body3623.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 +.LBB5_846: # %vector.body3651.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_849: # %vector.body3623 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_849 -# %bb.850: # %vector.body3628.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_851: # %vector.body3628 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_847: # %vector.body3651 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_847 + b .LBB5_573 +.LBB5_848: # %vector.body3635.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_849: # %vector.body3635 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_849 +# %bb.850: # %vector.body3643.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_851: # %vector.body3643 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_851 b .LBB5_573 .Lfunc_end5: diff --git a/results/MultiSource/Benchmarks/TSVC/ControlLoops-flt/CMakeFiles/ControlLoops-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/ControlLoops-flt/CMakeFiles/ControlLoops-flt.dir/tsc.s index a4f9159..1409a0d 100644 --- a/results/MultiSource/Benchmarks/TSVC/ControlLoops-flt/CMakeFiles/ControlLoops-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/ControlLoops-flt/CMakeFiles/ControlLoops-flt.dir/tsc.s @@ -11,6 +11,11 @@ .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI0_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1d .p2align 5 @@ -63,30 +68,29 @@ set1d: # @set1d bnez $a1, .LBB0_3 b .LBB0_8 .LBB0_4: # %vector.body40.preheader - addi.d $a0, $a0, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a1, %pc_hi20(.LCPI0_2) + vld $vr0, $a1, %pc_lo12(.LCPI0_2) + addi.d $a0, $a0, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB0_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB0_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB0_5 b .LBB0_8 .LBB0_6: # %.preheader.preheader move $a2, $zero @@ -118,6 +122,11 @@ set1d: # @set1d .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI1_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1ds .p2align 5 @@ -170,30 +179,29 @@ set1ds: # @set1ds bnez $a0, .LBB1_3 b .LBB1_8 .LBB1_4: # %vector.body40.preheader - addi.d $a0, $a1, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) + addi.d $a0, $a1, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB1_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB1_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB1_5 b .LBB1_8 .LBB1_6: # %.preheader.preheader move $a0, $zero @@ -685,14 +693,19 @@ check: # @check .word .LBB4_14-.LJTI4_0 .word .LBB4_17-.LJTI4_0 # -- End function - .section .rodata.cst32,"aM",@progbits,32 - .p2align 5, 0x0 # -- Begin function init + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 # -- Begin function init .LCPI5_0: + .dword 0 # 0x0 + .dword 1 # 0x1 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 +.LCPI5_1: .dword 4 # 0x4 .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 -.LCPI5_1: +.LCPI5_2: .dword 0 # 0x0 .dword 1 # 0x1 .dword 2 # 0x2 @@ -703,11 +716,12 @@ check: # @check .type init,@function init: # @init # %bb.0: - addi.d $sp, $sp, -80 - st.d $ra, $sp, 72 # 8-byte Folded Spill - st.d $fp, $sp, 64 # 8-byte Folded Spill - st.d $s0, $sp, 56 # 8-byte Folded Spill - st.d $s1, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -96 + st.d $ra, $sp, 88 # 8-byte Folded Spill + st.d $fp, $sp, 80 # 8-byte Folded Spill + st.d $s0, $sp, 72 # 8-byte Folded Spill + st.d $s1, $sp, 64 # 8-byte Folded Spill + st.d $s2, $sp, 56 # 8-byte Folded Spill move $fp, $a0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a1, $a0, %pc_lo12(.L.str.1) @@ -1661,131 +1675,122 @@ init: # @init jirl $ra, $ra, 0 bnez $a0, .LBB5_573 # %bb.136: # %vector.body.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 + addi.d $a3, $a0, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 .LBB5_137: # %vector.body # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_137 # %bb.138: # %vector.body3598.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 + lu12i.w $a3, 31 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 .LBB5_139: # %vector.body3598 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_139 -# %bb.140: # %vector.body3603.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_141: # %vector.body3603 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.140: # %vector.body3606.preheader + lu12i.w $a3, 62 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_141: # %vector.body3606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_141 -# %bb.142: # %vector.body3608.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_143: # %vector.body3608 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.142: # %vector.body3614.preheader + lu12i.w $a3, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_143: # %vector.body3614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_143 -# %bb.144: # %vector.body3613.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_145: # %vector.body3613 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 - bnez $a1, .LBB5_145 +# %bb.144: # %vector.body3622.preheader + lu12i.w $a3, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a3, 136 + add.d $a1, $a0, $a1 + ori $a2, $a2, 3328 +.LBB5_145: # %vector.body3622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_145 # %bb.146: # %.preheader34.i2938.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -1837,7 +1842,7 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_147 b .LBB5_573 -.LBB5_148: # %vector.body5445.preheader +.LBB5_148: # %vector.body5846.preheader pcalau12i $a0, %pc_hi20(X+4) addi.d $a0, $a0, %pc_lo12(X+4) pcalau12i $a1, %pc_hi20(V+4) @@ -1852,7 +1857,7 @@ init: # @init lu12i.w $a6, 7 ori $a6, $a6, 3328 .p2align 4, , 16 -.LBB5_149: # %vector.body5445 +.LBB5_149: # %vector.body5846 # =>This Inner Loop Header: Depth=1 bstrpick.d $a7, $a5, 31, 0 addi.d $t0, $a5, -5 @@ -1896,7 +1901,7 @@ init: # @init addi.d $a4, $a4, 8 bnez $a6, .LBB5_149 b .LBB5_573 -.LBB5_150: # %vector.body5412.preheader +.LBB5_150: # %vector.body5813.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -1904,24 +1909,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_151: # %vector.body5412 +.LBB5_151: # %vector.body5813 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_151 -# %bb.152: # %vector.body5417.preheader +# %bb.152: # %vector.body5818.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_153: # %vector.body5417 +.LBB5_153: # %vector.body5818 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1952,15 +1957,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_153 -# %bb.154: # %vector.body5424.preheader +# %bb.154: # %vector.body5825.preheader lu12i.w $a4, 62 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 2096 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_155: # %vector.body5424 +.LBB5_155: # %vector.body5825 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1991,15 +1996,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_155 -# %bb.156: # %vector.body5431.preheader +# %bb.156: # %vector.body5832.preheader lu12i.w $a4, 93 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 3152 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_157: # %vector.body5431 +.LBB5_157: # %vector.body5832 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2030,15 +2035,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_157 -# %bb.158: # %vector.body5438.preheader +# %bb.158: # %vector.body5839.preheader lu12i.w $a4, 125 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_159: # %vector.body5438 +.LBB5_159: # %vector.body5839 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2070,7 +2075,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_159 b .LBB5_573 -.LBB5_160: # %vector.body5400.preheader +.LBB5_160: # %vector.body5801.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2078,24 +2083,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_161: # %vector.body5400 +.LBB5_161: # %vector.body5801 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_161 -# %bb.162: # %vector.body5405.preheader +# %bb.162: # %vector.body5806.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_163: # %vector.body5405 +.LBB5_163: # %vector.body5806 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2127,7 +2132,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_163 b .LBB5_573 -.LBB5_164: # %vector.body5388.preheader +.LBB5_164: # %vector.body5789.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2135,24 +2140,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_165: # %vector.body5388 +.LBB5_165: # %vector.body5789 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_165 -# %bb.166: # %vector.body5393.preheader +# %bb.166: # %vector.body5794.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_167: # %vector.body5393 +.LBB5_167: # %vector.body5794 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2288,7 +2293,7 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_171 b .LBB5_573 -.LBB5_172: # %vector.body5354.preheader +.LBB5_172: # %vector.body5755.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2296,7 +2301,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_173: # %vector.body5354 +.LBB5_173: # %vector.body5755 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2436,14 +2441,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_179 b .LBB5_573 -.LBB5_180: # %vector.body5349.preheader +.LBB5_180: # %vector.body5750.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_181: # %vector.body5349 +.LBB5_181: # %vector.body5750 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -2451,7 +2456,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_181 b .LBB5_573 -.LBB5_182: # %vector.body5339.preheader +.LBB5_182: # %vector.body5740.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2459,7 +2464,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_183: # %vector.body5339 +.LBB5_183: # %vector.body5740 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2610,30 +2615,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_189 b .LBB5_573 -.LBB5_190: # %vector.body5315.preheader +.LBB5_190: # %vector.body5716.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_191: # %vector.body5315 +.LBB5_191: # %vector.body5716 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_191 -# %bb.192: # %vector.body5320.preheader +# %bb.192: # %vector.body5721.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_193: # %vector.body5320 +.LBB5_193: # %vector.body5721 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2665,30 +2670,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_193 b .LBB5_573 -.LBB5_194: # %vector.body5303.preheader +.LBB5_194: # %vector.body5704.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_195: # %vector.body5303 +.LBB5_195: # %vector.body5704 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_195 -# %bb.196: # %vector.body5308.preheader +# %bb.196: # %vector.body5709.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_197: # %vector.body5308 +.LBB5_197: # %vector.body5709 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2734,74 +2739,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_199: # %vector.body5283 +.LBB5_199: # %vector.body5678 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_199 -# %bb.200: # %vector.body5288.preheader +# %bb.200: # %vector.body5683.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_201: # %vector.body5288 +.LBB5_201: # %vector.body5683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_201 -# %bb.202: # %vector.body5293.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.202: # %vector.body5688.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_203: # %vector.body5293 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_203: # %vector.body5688 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_203 -# %bb.204: # %vector.body5298.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.204: # %vector.body5696.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_205: # %vector.body5298 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_205: # %vector.body5696 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_205 b .LBB5_573 .LBB5_206: # %.preheader.i1195.preheader @@ -2818,74 +2820,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_207: # %vector.body5263 +.LBB5_207: # %vector.body5652 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_207 -# %bb.208: # %vector.body5268.preheader +# %bb.208: # %vector.body5657.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_209: # %vector.body5268 +.LBB5_209: # %vector.body5657 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_209 -# %bb.210: # %vector.body5273.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.210: # %vector.body5662.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_211: # %vector.body5273 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_211: # %vector.body5662 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_211 -# %bb.212: # %vector.body5278.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.212: # %vector.body5670.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_213: # %vector.body5278 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_213: # %vector.body5670 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_213 b .LBB5_573 .LBB5_214: # %.preheader.i1217.preheader @@ -3072,31 +3071,30 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_221 -# %bb.222: # %vector.body5236.preheader - pcalau12i $a1, %pc_hi20(array+4) - addi.d $a1, $a1, %pc_lo12(array+4) - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_223: # %vector.body5236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_223 +# %bb.222: # %vector.body5622.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(array+8) + addi.d $a1, $a1, %pc_lo12(array+8) + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_223: # %vector.body5622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_223 # %bb.224: # %.preheader34.i1253.preheader lu12i.w $a1, 284 ori $a1, $a1, 1888 @@ -3162,87 +3160,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_227: # %vector.body5211 +.LBB5_227: # %vector.body5588 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_227 -# %bb.228: # %vector.body5216.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.228: # %vector.body5593.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_229: # %vector.body5216 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_229: # %vector.body5593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_229 -# %bb.230: # %vector.body5221.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.230: # %vector.body5601.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_231: # %vector.body5221 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_231: # %vector.body5601 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_231 -# %bb.232: # %vector.body5226.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.232: # %vector.body5609.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_233: # %vector.body5226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_233: # %vector.body5609 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_233 b .LBB5_573 .LBB5_234: # %.preheader.i1284.preheader @@ -3259,32 +3252,32 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -3264 -.LBB5_235: # %vector.body5196 +.LBB5_235: # %vector.body5573 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_235 -# %bb.236: # %vector.body5201.preheader +# %bb.236: # %vector.body5578.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_237: # %vector.body5201 +.LBB5_237: # %vector.body5578 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_237 -# %bb.238: # %vector.body5206.preheader +# %bb.238: # %vector.body5583.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_239: # %vector.body5206 +.LBB5_239: # %vector.body5583 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -3292,30 +3285,30 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_239 b .LBB5_573 -.LBB5_240: # %vector.body5184.preheader +.LBB5_240: # %vector.body5561.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_241: # %vector.body5184 +.LBB5_241: # %vector.body5561 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_241 -# %bb.242: # %vector.body5189.preheader +# %bb.242: # %vector.body5566.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_243: # %vector.body5189 +.LBB5_243: # %vector.body5566 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3392,65 +3385,62 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_245 -# %bb.246: # %vector.body5174.preheader +# %bb.246: # %vector.body5545.preheader lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_247: # %vector.body5174 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_247: # %vector.body5545 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_247 -# %bb.248: # %vector.body5179.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.248: # %vector.body5553.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_249: # %vector.body5179 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_249: # %vector.body5553 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_249 b .LBB5_573 -.LBB5_250: # %vector.body5157.preheader +.LBB5_250: # %vector.body5528.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_251: # %vector.body5157 +.LBB5_251: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3506,30 +3496,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_253 b .LBB5_573 -.LBB5_254: # %vector.body5145.preheader +.LBB5_254: # %vector.body5516.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_255: # %vector.body5145 +.LBB5_255: # %vector.body5516 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_255 -# %bb.256: # %vector.body5150.preheader +# %bb.256: # %vector.body5521.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_257: # %vector.body5150 +.LBB5_257: # %vector.body5521 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3561,14 +3551,14 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_257 b .LBB5_573 -.LBB5_258: # %vector.body5125.preheader +.LBB5_258: # %vector.body5487.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 xvldi $xr0, -1424 -.LBB5_259: # %vector.body5125 +.LBB5_259: # %vector.body5487 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3583,274 +3573,263 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 + lu12i.w $a1, 62 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 2104 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_261: # %vector.body5130 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_261: # %vector.body5492 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_261 -# %bb.262: # %vector.body5135.preheader - lu12i.w $a0, 93 - ori $a0, $a0, 3156 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 +# %bb.262: # %vector.body5500.preheader + lu12i.w $a1, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 3160 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_263: # %vector.body5135 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_263: # %vector.body5500 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_263 -# %bb.264: # %vector.body5140.preheader - lu12i.w $a0, 125 - ori $a0, $a0, 132 +# %bb.264: # %vector.body5508.preheader + lu12i.w $a1, 125 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a1, 136 add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - ori $a2, $s0, 3328 -.LBB5_265: # %vector.body5140 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_265 + ori $a1, $s0, 3328 +.LBB5_265: # %vector.body5508 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_265 b .LBB5_573 -.LBB5_266: # %vector.body5094.preheader +.LBB5_266: # %vector.body5450.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_267: # %vector.body5094 +.LBB5_267: # %vector.body5450 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_267 -# %bb.268: # %vector.body5099.preheader +# %bb.268: # %vector.body5455.preheader lu12i.w $a2, 31 ori $a3, $a2, 1048 add.d $a4, $a0, $a3 lu12i.w $a3, 3 ori $a5, $a3, 3712 lu12i.w $a6, 260096 -.LBB5_269: # %vector.body5099 +.LBB5_269: # %vector.body5455 # =>This Inner Loop Header: Depth=1 st.w $a6, $a4, -8 st.w $a6, $a4, 0 addi.d $a5, $a5, -2 addi.d $a4, $a4, 16 bnez $a5, .LBB5_269 -# %bb.270: # %vector.body5104.preheader +# %bb.270: # %vector.body5460.preheader ori $a2, $a2, 1052 add.d $a2, $a0, $a2 ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 -.LBB5_271: # %vector.body5104 +.LBB5_271: # %vector.body5460 # =>This Inner Loop Header: Depth=1 st.w $a4, $a2, -8 st.w $a4, $a2, 0 addi.d $a3, $a3, -2 addi.d $a2, $a2, 16 bnez $a3, .LBB5_271 -# %bb.272: # %vector.body5110.preheader +# %bb.272: # %vector.body5466.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_273: # %vector.body5110 +.LBB5_273: # %vector.body5466 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_273 -# %bb.274: # %vector.body5115.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.274: # %vector.body5471.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_275: # %vector.body5115 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_275: # %vector.body5471 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_275 -# %bb.276: # %vector.body5120.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.276: # %vector.body5479.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_277: # %vector.body5120 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_277: # %vector.body5479 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_277 b .LBB5_573 -.LBB5_278: # %vector.body5079.preheader +.LBB5_278: # %vector.body5429.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_279: # %vector.body5079 +.LBB5_279: # %vector.body5429 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_279 -# %bb.280: # %vector.body5084.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.280: # %vector.body5434.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_281: # %vector.body5084 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_281: # %vector.body5434 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_281 -# %bb.282: # %vector.body5089.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.282: # %vector.body5442.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_283: # %vector.body5089 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_283: # %vector.body5442 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_283 b .LBB5_573 -.LBB5_284: # %vector.body5067.preheader +.LBB5_284: # %vector.body5417.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_285: # %vector.body5067 +.LBB5_285: # %vector.body5417 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_285 -# %bb.286: # %vector.body5072.preheader +# %bb.286: # %vector.body5422.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_287: # %vector.body5072 +.LBB5_287: # %vector.body5422 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3882,30 +3861,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_287 b .LBB5_573 -.LBB5_288: # %vector.body5055.preheader +.LBB5_288: # %vector.body5405.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_289: # %vector.body5055 +.LBB5_289: # %vector.body5405 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_289 -# %bb.290: # %vector.body5060.preheader +# %bb.290: # %vector.body5410.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_291: # %vector.body5060 +.LBB5_291: # %vector.body5410 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3937,30 +3916,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_291 b .LBB5_573 -.LBB5_292: # %vector.body5043.preheader +.LBB5_292: # %vector.body5393.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_293: # %vector.body5043 +.LBB5_293: # %vector.body5393 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_293 -# %bb.294: # %vector.body5048.preheader +# %bb.294: # %vector.body5398.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_295: # %vector.body5048 +.LBB5_295: # %vector.body5398 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3992,30 +3971,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_295 b .LBB5_573 -.LBB5_296: # %vector.body5031.preheader +.LBB5_296: # %vector.body5381.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_297: # %vector.body5031 +.LBB5_297: # %vector.body5381 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_297 -# %bb.298: # %vector.body5036.preheader +# %bb.298: # %vector.body5386.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_299: # %vector.body5036 +.LBB5_299: # %vector.body5386 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4047,30 +4026,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_299 b .LBB5_573 -.LBB5_300: # %vector.body5019.preheader +.LBB5_300: # %vector.body5369.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_301: # %vector.body5019 +.LBB5_301: # %vector.body5369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_301 -# %bb.302: # %vector.body5024.preheader +# %bb.302: # %vector.body5374.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_303: # %vector.body5024 +.LBB5_303: # %vector.body5374 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4102,69 +4081,66 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_303 b .LBB5_573 -.LBB5_304: # %vector.body5004.preheader +.LBB5_304: # %vector.body5348.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_305: # %vector.body5004 +.LBB5_305: # %vector.body5348 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_305 -# %bb.306: # %vector.body5009.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.306: # %vector.body5353.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_307: # %vector.body5009 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_307: # %vector.body5353 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_307 -# %bb.308: # %vector.body5014.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.308: # %vector.body5361.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_309: # %vector.body5014 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_309: # %vector.body5361 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_309 b .LBB5_573 .LBB5_310: # %.preheader.i1469.preheader @@ -4181,254 +4157,241 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_311: # %vector.body4984 +.LBB5_311: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_311 -# %bb.312: # %vector.body4989.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.312: # %vector.body5324.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_313: # %vector.body4989 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_313: # %vector.body5324 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_313 -# %bb.314: # %vector.body4994.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.314: # %vector.body5332.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_315: # %vector.body4994 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_315: # %vector.body5332 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_315 -# %bb.316: # %vector.body4999.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.316: # %vector.body5340.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_317: # %vector.body4999 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_317: # %vector.body5340 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_317 b .LBB5_573 -.LBB5_318: # %vector.body4964.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_319: # %vector.body4964 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_318: # %vector.body5293.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_319: # %vector.body5293 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_319 -# %bb.320: # %vector.body4969.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1072 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 +# %bb.320: # %vector.body5301.preheader + lu12i.w $a3, 31 + ori $a3, $a3, 1072 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 xvldi $xr0, -1424 -.LBB5_321: # %vector.body4969 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_321 -# %bb.322: # %vector.body4974.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2128 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 -.LBB5_323: # %vector.body4974 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_323 -# %bb.324: # %vector.body4979.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_325: # %vector.body4979 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_321: # %vector.body5301 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_321 +# %bb.322: # %vector.body5306.preheader + lu12i.w $a3, 62 + ori $a3, $a3, 2128 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 +.LBB5_323: # %vector.body5306 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_323 +# %bb.324: # %vector.body5311.preheader + lu12i.w $a3, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 3160 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_325: # %vector.body5311 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_325 b .LBB5_573 -.LBB5_326: # %vector.body4944.preheader +.LBB5_326: # %vector.body5264.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_327: # %vector.body4944 +.LBB5_327: # %vector.body5264 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_327 -# %bb.328: # %vector.body4949.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.328: # %vector.body5269.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_329: # %vector.body4949 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_329: # %vector.body5269 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_329 -# %bb.330: # %vector.body4954.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.330: # %vector.body5277.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_331: # %vector.body4954 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_331: # %vector.body5277 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_331 -# %bb.332: # %vector.body4959.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.332: # %vector.body5285.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_333: # %vector.body4959 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_333: # %vector.body5285 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_333 b .LBB5_573 .LBB5_334: # %.preheader.i1529.preheader @@ -4445,19 +4408,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_335: # %vector.body4934 +.LBB5_335: # %vector.body5254 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_335 -# %bb.336: # %vector.body4939.preheader +# %bb.336: # %vector.body5259.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_337: # %vector.body4939 +.LBB5_337: # %vector.body5259 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -4915,69 +4878,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_358 b .LBB5_573 -.LBB5_359: # %vector.body4850.preheader +.LBB5_359: # %vector.body5164.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_360: # %vector.body4850 +.LBB5_360: # %vector.body5164 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_360 -# %bb.361: # %vector.body4855.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.361: # %vector.body5169.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_362: # %vector.body4855 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a5 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_362: # %vector.body5169 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a3, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_362 -# %bb.363: # %vector.body4860.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.363: # %vector.body5177.preheader + lu12i.w $a3, 62 + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 ori $a1, $a1, 3328 -.LBB5_364: # %vector.body4860 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a4 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 +.LBB5_364: # %vector.body5177 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a2, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a1, $a1, -4 + addi.d $a2, $a2, 16 bnez $a1, .LBB5_364 # %bb.365: # %.preheader.i1631.preheader lu12i.w $a1, 156 @@ -5071,50 +5031,50 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_368 b .LBB5_573 -.LBB5_369: # %vector.body4830.preheader +.LBB5_369: # %vector.body5144.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_370: # %vector.body4830 +.LBB5_370: # %vector.body5144 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_370 -# %bb.371: # %vector.body4835.preheader +# %bb.371: # %vector.body5149.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_372: # %vector.body4835 +.LBB5_372: # %vector.body5149 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_372 -# %bb.373: # %vector.body4840.preheader +# %bb.373: # %vector.body5154.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_374: # %vector.body4840 +.LBB5_374: # %vector.body5154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_374 -# %bb.375: # %vector.body4845.preheader +# %bb.375: # %vector.body5159.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_376: # %vector.body4845 +.LBB5_376: # %vector.body5159 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5122,7 +5082,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_376 b .LBB5_573 -.LBB5_377: # %vector.body4810.preheader +.LBB5_377: # %vector.body5124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -5131,43 +5091,43 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_378: # %vector.body4810 +.LBB5_378: # %vector.body5124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_378 -# %bb.379: # %vector.body4815.preheader +# %bb.379: # %vector.body5129.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_380: # %vector.body4815 +.LBB5_380: # %vector.body5129 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_380 -# %bb.381: # %vector.body4820.preheader +# %bb.381: # %vector.body5134.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_382: # %vector.body4820 +.LBB5_382: # %vector.body5134 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_382 -# %bb.383: # %vector.body4825.preheader +# %bb.383: # %vector.body5139.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_384: # %vector.body4825 +.LBB5_384: # %vector.body5139 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5189,87 +5149,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_386: # %vector.body4790 +.LBB5_386: # %vector.body5095 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_386 -# %bb.387: # %vector.body4795.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.387: # %vector.body5100.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_388: # %vector.body4795 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_388: # %vector.body5100 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_388 -# %bb.389: # %vector.body4800.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.389: # %vector.body5108.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_390: # %vector.body4800 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_390: # %vector.body5108 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_390 -# %bb.391: # %vector.body4805.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.391: # %vector.body5116.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_392: # %vector.body4805 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_392: # %vector.body5116 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_392 b .LBB5_573 .LBB5_393: # %.preheader.i1701.preheader @@ -5286,14 +5241,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_394: # %vector.body4775 +.LBB5_394: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_394 -# %bb.395: # %vector.body4780.preheader +# %bb.395: # %vector.body5085.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -5301,19 +5256,19 @@ init: # @init lu12i.w $a3, 219235 ori $a3, $a3, 1981 xvreplgr2vr.w $xr0, $a3 -.LBB5_396: # %vector.body4780 +.LBB5_396: # %vector.body5085 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_396 -# %bb.397: # %vector.body4785.preheader +# %bb.397: # %vector.body5090.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_398: # %vector.body4785 +.LBB5_398: # %vector.body5090 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5335,87 +5290,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_400: # %vector.body4755 +.LBB5_400: # %vector.body5051 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_400 -# %bb.401: # %vector.body4760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.401: # %vector.body5056.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_402: # %vector.body4760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_402: # %vector.body5056 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_402 -# %bb.403: # %vector.body4765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.403: # %vector.body5064.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_404: # %vector.body4765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_404: # %vector.body5064 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_404 -# %bb.405: # %vector.body4770.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.405: # %vector.body5072.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_406: # %vector.body4770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_406: # %vector.body5072 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_406 b .LBB5_573 .LBB5_407: # %.preheader.i1740.preheader @@ -5432,19 +5382,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_408: # %vector.body4745 +.LBB5_408: # %vector.body5041 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_408 -# %bb.409: # %vector.body4750.preheader +# %bb.409: # %vector.body5046.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_410: # %vector.body4750 +.LBB5_410: # %vector.body5046 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5452,21 +5402,21 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_410 b .LBB5_573 -.LBB5_411: # %vector.body4725.preheader +.LBB5_411: # %vector.body5018.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_412: # %vector.body4725 +.LBB5_412: # %vector.body5018 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_412 -# %bb.413: # %vector.body4730.preheader +# %bb.413: # %vector.body5023.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 @@ -5474,49 +5424,48 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr1, $a4 -.LBB5_414: # %vector.body4730 +.LBB5_414: # %vector.body5023 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a2, -32 xvst $xr1, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_414 -# %bb.415: # %vector.body4735.preheader +# %bb.415: # %vector.body5028.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_416: # %vector.body4735 +.LBB5_416: # %vector.body5028 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_416 -# %bb.417: # %vector.body4740.preheader +# %bb.417: # %vector.body5033.preheader lu12i.w $a2, 93 - ori $a2, $a2, 3156 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_418: # %vector.body4740 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_418: # %vector.body5033 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_418 b .LBB5_573 .LBB5_419: # %.preheader.i1769.preheader @@ -5533,7 +5482,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_420: # %vector.body4720 +.LBB5_420: # %vector.body5013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5555,7 +5504,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_422: # %vector.body4715 +.LBB5_422: # %vector.body5008 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5563,14 +5512,14 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_422 b .LBB5_573 -.LBB5_423: # %vector.body4700.preheader +.LBB5_423: # %vector.body4993.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_424: # %vector.body4700 +.LBB5_424: # %vector.body4993 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5663,14 +5612,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_428 b .LBB5_573 -.LBB5_429: # %vector.body4685.preheader +.LBB5_429: # %vector.body4978.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_430: # %vector.body4685 +.LBB5_430: # %vector.body4978 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5763,32 +5712,31 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_434 b .LBB5_573 -.LBB5_435: # %vector.body4663.preheader +.LBB5_435: # %vector.body4947.preheader + pcalau12i $s1, %pc_hi20(.LCPI5_0) + vld $vr0, $s1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) - addi.d $a0, $s0, 4 - ori $a1, $zero, 2 - lu12i.w $s1, 7 - ori $a2, $s1, 3328 -.LBB5_436: # %vector.body4663 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_436 + addi.d $a0, $s0, 8 + lu12i.w $s2, 7 + ori $a1, $s2, 3328 +.LBB5_436: # %vector.body4947 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_436 # %bb.437: # %.preheader.i1826.preheader lu12i.w $a1, 31 ori $a0, $a1, 1040 @@ -5799,54 +5747,50 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 2104 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_438: # %vector.body4668 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_438 -# %bb.439: # %vector.body4673.preheader + ori $a1, $s2, 3328 +.LBB5_438: # %vector.body4955 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_438 +# %bb.439: # %vector.body4963.preheader lu12i.w $a0, 93 - ori $a0, $a0, 3156 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 3160 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_440: # %vector.body4673 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_440 + ori $a1, $s2, 3328 +.LBB5_440: # %vector.body4963 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_440 # %bb.441: # %.preheader.i1840.preheader lu12i.w $a0, 125 ori $a0, $a0, 128 @@ -5905,30 +5849,30 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_442 b .LBB5_573 -.LBB5_443: # %vector.body4639.preheader +.LBB5_443: # %vector.body4923.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_444: # %vector.body4639 +.LBB5_444: # %vector.body4923 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_444 -# %bb.445: # %vector.body4644.preheader +# %bb.445: # %vector.body4928.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 -.LBB5_446: # %vector.body4644 +.LBB5_446: # %vector.body4928 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5959,14 +5903,14 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_446 -# %bb.447: # %vector.body4651.preheader +# %bb.447: # %vector.body4935.preheader lu12i.w $a4, 62 - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 2096 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_448: # %vector.body4651 +.LBB5_448: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5997,12 +5941,12 @@ init: # @init addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 bnez $a3, .LBB5_448 -# %bb.449: # %vector.body4658.preheader +# %bb.449: # %vector.body4942.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_450: # %vector.body4658 +.LBB5_450: # %vector.body4942 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6010,154 +5954,148 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_450 b .LBB5_573 -.LBB5_451: # %vector.body4624.preheader +.LBB5_451: # %vector.body4902.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_452: # %vector.body4624 +.LBB5_452: # %vector.body4902 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_452 -# %bb.453: # %vector.body4629.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.453: # %vector.body4907.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_454: # %vector.body4629 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_454: # %vector.body4907 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_454 -# %bb.455: # %vector.body4634.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.455: # %vector.body4915.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_456: # %vector.body4634 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_456: # %vector.body4915 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_456 b .LBB5_573 -.LBB5_457: # %vector.body4599.preheader +.LBB5_457: # %vector.body4871.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_458: # %vector.body4599 +.LBB5_458: # %vector.body4871 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_458 -# %bb.459: # %vector.body4604.preheader +# %bb.459: # %vector.body4876.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_460: # %vector.body4604 +.LBB5_460: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_460 -# %bb.461: # %vector.body4609.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.461: # %vector.body4881.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_462: # %vector.body4609 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_462: # %vector.body4881 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_462 -# %bb.463: # %vector.body4614.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.463: # %vector.body4889.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_464: # %vector.body4614 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_464 -# %bb.465: # %vector.body4619.preheader + ori $a3, $a1, 3328 +.LBB5_464: # %vector.body4889 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 + bnez $a3, .LBB5_464 +# %bb.465: # %vector.body4897.preheader lu12i.w $a2, 125 ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3264 -.LBB5_466: # %vector.body4619 +.LBB5_466: # %vector.body4897 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6165,45 +6103,45 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_466 b .LBB5_573 -.LBB5_467: # %vector.body4574.preheader +.LBB5_467: # %vector.body4843.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_468: # %vector.body4574 +.LBB5_468: # %vector.body4843 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_468 -# %bb.469: # %vector.body4579.preheader +# %bb.469: # %vector.body4848.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_470: # %vector.body4579 +.LBB5_470: # %vector.body4848 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_470 -# %bb.471: # %vector.body4584.preheader +# %bb.471: # %vector.body4853.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_472: # %vector.body4584 +.LBB5_472: # %vector.body4853 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_472 -# %bb.473: # %vector.body4589.preheader +# %bb.473: # %vector.body4858.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a2, $a0, $a2 @@ -6211,37 +6149,36 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_474: # %vector.body4589 +.LBB5_474: # %vector.body4858 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_474 -# %bb.475: # %vector.body4594.preheader +# %bb.475: # %vector.body4863.preheader lu12i.w $a2, 125 - ori $a2, $a2, 132 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_476: # %vector.body4594 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_476: # %vector.body4863 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_476 b .LBB5_573 .LBB5_477: # %.preheader.i1927.preheader @@ -6258,74 +6195,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_478: # %vector.body4554 +.LBB5_478: # %vector.body4817 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_478 -# %bb.479: # %vector.body4559.preheader +# %bb.479: # %vector.body4822.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_480: # %vector.body4559 +.LBB5_480: # %vector.body4822 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_480 -# %bb.481: # %vector.body4564.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.481: # %vector.body4827.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_482: # %vector.body4564 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_482: # %vector.body4827 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_482 -# %bb.483: # %vector.body4569.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.483: # %vector.body4835.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_484: # %vector.body4569 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_484: # %vector.body4835 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_484 b .LBB5_573 .LBB5_485: # %.preheader.i1949.preheader @@ -6461,669 +6395,638 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_490 b .LBB5_573 -.LBB5_491: # %vector.body4519.preheader +.LBB5_491: # %vector.body4773.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_492: # %vector.body4519 +.LBB5_492: # %vector.body4773 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_492 -# %bb.493: # %vector.body4524.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.493: # %vector.body4778.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_494: # %vector.body4524 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_494: # %vector.body4778 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_494 -# %bb.495: # %vector.body4529.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.495: # %vector.body4786.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_496: # %vector.body4529 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_496: # %vector.body4786 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_496 -# %bb.497: # %vector.body4534.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.497: # %vector.body4794.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_498: # %vector.body4534 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_498: # %vector.body4794 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_498 b .LBB5_573 -.LBB5_499: # %vector.body4489.preheader +.LBB5_499: # %vector.body4734.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_500: # %vector.body4489 +.LBB5_500: # %vector.body4734 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_500 -# %bb.501: # %vector.body4494.preheader +# %bb.501: # %vector.body4739.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_502: # %vector.body4494 +.LBB5_502: # %vector.body4739 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_502 -# %bb.503: # %vector.body4499.preheader +# %bb.503: # %vector.body4744.preheader lu12i.w $a2, 46 ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_504: # %vector.body4499 +.LBB5_504: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_504 -# %bb.505: # %vector.body4504.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.505: # %vector.body4749.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_506: # %vector.body4504 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_506: # %vector.body4749 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_506 -# %bb.507: # %vector.body4509.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.507: # %vector.body4757.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_508: # %vector.body4509 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_508: # %vector.body4757 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_508 -# %bb.509: # %vector.body4514.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.509: # %vector.body4765.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_510: # %vector.body4514 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_510: # %vector.body4765 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_510 b .LBB5_573 -.LBB5_511: # %vector.body4459.preheader +.LBB5_511: # %vector.body4695.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_512: # %vector.body4459 +.LBB5_512: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_512 -# %bb.513: # %vector.body4464.preheader +# %bb.513: # %vector.body4700.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_514: # %vector.body4464 +.LBB5_514: # %vector.body4700 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_514 -# %bb.515: # %vector.body4469.preheader +# %bb.515: # %vector.body4705.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_516: # %vector.body4469 +.LBB5_516: # %vector.body4705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_516 -# %bb.517: # %vector.body4474.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.517: # %vector.body4710.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_518: # %vector.body4474 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_518: # %vector.body4710 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_518 -# %bb.519: # %vector.body4479.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.519: # %vector.body4718.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_520: # %vector.body4479 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_520: # %vector.body4718 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_520 -# %bb.521: # %vector.body4484.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.521: # %vector.body4726.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_522: # %vector.body4484 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_522: # %vector.body4726 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_522 b .LBB5_573 -.LBB5_523: # %vector.body4429.preheader +.LBB5_523: # %vector.body4656.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_524: # %vector.body4429 +.LBB5_524: # %vector.body4656 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_524 -# %bb.525: # %vector.body4434.preheader +# %bb.525: # %vector.body4661.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_526: # %vector.body4434 +.LBB5_526: # %vector.body4661 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_526 -# %bb.527: # %vector.body4439.preheader +# %bb.527: # %vector.body4666.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_528: # %vector.body4439 +.LBB5_528: # %vector.body4666 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_528 -# %bb.529: # %vector.body4444.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.529: # %vector.body4671.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_530: # %vector.body4444 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_530: # %vector.body4671 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_530 -# %bb.531: # %vector.body4449.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.531: # %vector.body4679.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_532: # %vector.body4449 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_532: # %vector.body4679 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_532 -# %bb.533: # %vector.body4454.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.533: # %vector.body4687.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_534: # %vector.body4454 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_534: # %vector.body4687 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_534 b .LBB5_573 -.LBB5_535: # %vector.body4404.preheader +.LBB5_535: # %vector.body4622.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_536: # %vector.body4404 +.LBB5_536: # %vector.body4622 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_536 -# %bb.537: # %vector.body4409.preheader +# %bb.537: # %vector.body4627.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_538: # %vector.body4409 +.LBB5_538: # %vector.body4627 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_538 -# %bb.539: # %vector.body4414.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.539: # %vector.body4632.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_540: # %vector.body4414 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_540: # %vector.body4632 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_540 -# %bb.541: # %vector.body4419.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.541: # %vector.body4640.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_542: # %vector.body4419 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_542: # %vector.body4640 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_542 -# %bb.543: # %vector.body4424.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.543: # %vector.body4648.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_544: # %vector.body4424 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_544: # %vector.body4648 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_544 b .LBB5_573 -.LBB5_545: # %vector.body4389.preheader +.LBB5_545: # %vector.body4601.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_546: # %vector.body4389 +.LBB5_546: # %vector.body4601 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_546 -# %bb.547: # %vector.body4394.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.547: # %vector.body4606.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_548: # %vector.body4394 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_548: # %vector.body4606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_548 -# %bb.549: # %vector.body4399.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.549: # %vector.body4614.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_550: # %vector.body4399 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_550: # %vector.body4614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_550 b .LBB5_573 -.LBB5_551: # %vector.body4374.preheader +.LBB5_551: # %vector.body4580.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_552: # %vector.body4374 +.LBB5_552: # %vector.body4580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_552 -# %bb.553: # %vector.body4379.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.553: # %vector.body4585.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_554: # %vector.body4379 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_554: # %vector.body4585 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_554 -# %bb.555: # %vector.body4384.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.555: # %vector.body4593.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_556: # %vector.body4384 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_556: # %vector.body4593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_556 b .LBB5_573 .LBB5_557: # %.preheader.i2121.preheader @@ -7140,19 +7043,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_558: # %vector.body4364 +.LBB5_558: # %vector.body4570 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_558 -# %bb.559: # %vector.body4369.preheader +# %bb.559: # %vector.body4575.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_560: # %vector.body4369 +.LBB5_560: # %vector.body4575 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7174,7 +7077,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_562: # %vector.body4359 +.LBB5_562: # %vector.body4565 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7196,7 +7099,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_564: # %vector.body4354 +.LBB5_564: # %vector.body4560 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7204,31 +7107,30 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_564 b .LBB5_573 -.LBB5_565: # %vector.body4349.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_566: # %vector.body4349 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_566 +.LBB5_565: # %vector.body4552.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_566: # %vector.body4552 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_566 b .LBB5_573 .LBB5_567: # %.preheader.i2154.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7377,43 +7279,43 @@ init: # @init bnez $a2, .LBB5_572 .LBB5_573: # %set1d.exit1086 move $a0, $zero - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $fp, $sp, 64 # 8-byte Folded Reload - ld.d $ra, $sp, 72 # 8-byte Folded Reload - addi.d $sp, $sp, 80 + ld.d $s2, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload + ld.d $s0, $sp, 72 # 8-byte Folded Reload + ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $ra, $sp, 88 # 8-byte Folded Reload + addi.d $sp, $sp, 96 ret .LBB5_574: # %.preheader.i2177.preheader pcalau12i $a0, %pc_hi20(global_data+640192) addi.d $a0, $a0, %pc_lo12(global_data+640192) b .LBB5_345 -.LBB5_575: # %vector.body4325.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_576: # %vector.body4325 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_576 +.LBB5_575: # %vector.body4525.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_576: # %vector.body4525 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_576 b .LBB5_573 -.LBB5_577: # %vector.body4320.preheader +.LBB5_577: # %vector.body4520.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 @@ -7421,7 +7323,7 @@ init: # @init lu12i.w $a2, 260096 ori $a2, $a2, 8 xvreplgr2vr.w $xr0, $a2 -.LBB5_578: # %vector.body4320 +.LBB5_578: # %vector.body4520 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7429,162 +7331,155 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_578 b .LBB5_573 -.LBB5_579: # %vector.body4310.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_580: # %vector.body4310 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_579: # %vector.body4504.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_580: # %vector.body4504 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_580 -# %bb.581: # %vector.body4315.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_582: # %vector.body4315 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.581: # %vector.body4512.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_582: # %vector.body4512 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_582 b .LBB5_573 -.LBB5_583: # %vector.body4305.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_584: # %vector.body4305 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_584 +.LBB5_583: # %vector.body4496.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_584: # %vector.body4496 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_584 b .LBB5_573 -.LBB5_585: # %vector.body4300.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_586: # %vector.body4300 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_586 +.LBB5_585: # %vector.body4488.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_586: # %vector.body4488 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_586 b .LBB5_573 -.LBB5_587: # %vector.body4295.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_588: # %vector.body4295 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_588 +.LBB5_587: # %vector.body4480.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_588: # %vector.body4480 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_588 b .LBB5_573 -.LBB5_589: # %vector.body4290.preheader +.LBB5_589: # %vector.body4472.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_590: # %vector.body4290 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_590 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_590: # %vector.body4472 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_590 b .LBB5_607 .LBB5_591: # %.preheader.i2230.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7603,79 +7498,74 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a1, $fp, $a0 - ori $a2, $zero, 2 + ori $a0, $a0, 2104 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + add.d $a2, $fp, $a0 lu12i.w $a0, 7 ori $a3, $a0, 3328 -.LBB5_592: # %vector.body4275 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_592: # %vector.body4448 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_592 -# %bb.593: # %vector.body4280.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.593: # %vector.body4456.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_594: # %vector.body4280 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_594: # %vector.body4456 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_594 -# %bb.595: # %vector.body4285.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.595: # %vector.body4464.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_596: # %vector.body4285 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_596: # %vector.body4464 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_596 b .LBB5_573 .LBB5_597: # %.preheader34.i2253.preheader @@ -7734,42 +7624,41 @@ init: # @init lu12i.w $a1, 220 ori $a1, $a1, 1212 b .LBB5_630 -.LBB5_600: # %vector.body4263.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_601: # %vector.body4263 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_601 - b .LBB5_573 -.LBB5_602: # %vector.body4256.preheader +.LBB5_600: # %vector.body4433.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_601: # %vector.body4433 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_601 + b .LBB5_573 +.LBB5_602: # %vector.body4426.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_603: # %vector.body4256 +.LBB5_603: # %vector.body4426 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -7804,45 +7693,44 @@ init: # @init pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) b .LBB5_610 -.LBB5_605: # %vector.body4251.preheader +.LBB5_605: # %vector.body4418.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_606: # %vector.body4251 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_606 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_606: # %vector.body4418 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_606 .LBB5_607: # %set1d.exit2229 lu12i.w $a1, 31 ori $a1, $a1, 1020 lu12i.w $a2, -262144 b .LBB5_687 -.LBB5_608: # %vector.body4246.preheader +.LBB5_608: # %vector.body4413.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_609: # %vector.body4246 +.LBB5_609: # %vector.body4413 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7855,14 +7743,14 @@ init: # @init add.d $a0, $a0, $a2 ori $a2, $a1, 1024 b .LBB5_346 -.LBB5_611: # %vector.body4241.preheader +.LBB5_611: # %vector.body4408.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_612: # %vector.body4241 +.LBB5_612: # %vector.body4408 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7886,147 +7774,141 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 b .LBB5_573 -.LBB5_614: # %vector.body4216.preheader +.LBB5_614: # %vector.body4374.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_615: # %vector.body4216 +.LBB5_615: # %vector.body4374 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_615 -# %bb.616: # %vector.body4221.preheader +# %bb.616: # %vector.body4379.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_617: # %vector.body4221 +.LBB5_617: # %vector.body4379 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_617 -# %bb.618: # %vector.body4226.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.618: # %vector.body4384.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_619: # %vector.body4226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_619: # %vector.body4384 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_619 -# %bb.620: # %vector.body4231.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.620: # %vector.body4392.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_621: # %vector.body4231 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_621: # %vector.body4392 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_621 -# %bb.622: # %vector.body4236.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.622: # %vector.body4400.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_623: # %vector.body4236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_623: # %vector.body4400 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_623 b .LBB5_573 -.LBB5_624: # %vector.body4211.preheader +.LBB5_624: # %vector.body4366.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_625: # %vector.body4211 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_625 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_625: # %vector.body4366 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_625 # %bb.626: # %set1d.exit2327 lu12i.w $a1, 31 b .LBB5_686 -.LBB5_627: # %vector.body4204.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_627: # %vector.body4359.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_628: # %vector.body4204 +.LBB5_628: # %vector.body4359 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8075,81 +7957,77 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ori $a0, $s0, 1044 + ori $a0, $s0, 1048 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_632: # %vector.body4199 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_632 - b .LBB5_573 -.LBB5_633: # %vector.body4189.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_634: # %vector.body4189 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_634 -# %bb.635: # %vector.body4194.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_636: # %vector.body4194 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_632: # %vector.body4351 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_632 + b .LBB5_573 +.LBB5_633: # %vector.body4335.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_634: # %vector.body4335 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_634 +# %bb.635: # %vector.body4343.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_636: # %vector.body4343 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_636 b .LBB5_573 .LBB5_637: # %.preheader34.i2352.preheader @@ -8248,26 +8126,26 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_640 b .LBB5_573 -.LBB5_641: # %vector.body4167.preheader +.LBB5_641: # %vector.body4313.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_642: # %vector.body4167 +.LBB5_642: # %vector.body4313 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_642 -# %bb.643: # %vector.body4172.preheader +# %bb.643: # %vector.body4318.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_644: # %vector.body4172 +.LBB5_644: # %vector.body4318 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8275,78 +8153,75 @@ init: # @init addi.d $a2, $a2, 64 bnez $a1, .LBB5_644 b .LBB5_653 -.LBB5_645: # %vector.body4157.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_646: # %vector.body4157 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_646 -# %bb.647: # %vector.body4162.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_648: # %vector.body4162 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_645: # %vector.body4297.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_646: # %vector.body4297 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_646 +# %bb.647: # %vector.body4305.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_648: # %vector.body4305 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_648 b .LBB5_573 -.LBB5_649: # %vector.body4147.preheader +.LBB5_649: # %vector.body4287.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_650: # %vector.body4147 +.LBB5_650: # %vector.body4287 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_650 -# %bb.651: # %vector.body4152.preheader +# %bb.651: # %vector.body4292.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_652: # %vector.body4152 +.LBB5_652: # %vector.body4292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8359,134 +8234,128 @@ init: # @init lu12i.w $a2, 260096 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_654: # %vector.body4132.preheader +.LBB5_654: # %vector.body4266.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_655: # %vector.body4132 +.LBB5_655: # %vector.body4266 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_655 -# %bb.656: # %vector.body4137.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.656: # %vector.body4271.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_657: # %vector.body4137 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_657: # %vector.body4271 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_657 -# %bb.658: # %vector.body4142.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.658: # %vector.body4279.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_659: # %vector.body4142 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_659: # %vector.body4279 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_659 b .LBB5_573 -.LBB5_660: # %vector.body4117.preheader +.LBB5_660: # %vector.body4245.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_661: # %vector.body4117 +.LBB5_661: # %vector.body4245 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_661 -# %bb.662: # %vector.body4122.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.662: # %vector.body4250.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_663: # %vector.body4122 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_663: # %vector.body4250 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_663 -# %bb.664: # %vector.body4127.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.664: # %vector.body4258.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_665: # %vector.body4127 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_665: # %vector.body4258 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_665 b .LBB5_573 .LBB5_666: # %.preheader.i2421.preheader @@ -8503,74 +8372,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_667: # %vector.body4097 +.LBB5_667: # %vector.body4219 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_667 -# %bb.668: # %vector.body4102.preheader +# %bb.668: # %vector.body4224.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_669: # %vector.body4102 +.LBB5_669: # %vector.body4224 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_669 -# %bb.670: # %vector.body4107.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.670: # %vector.body4229.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_671: # %vector.body4107 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_671: # %vector.body4229 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_671 -# %bb.672: # %vector.body4112.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.672: # %vector.body4237.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_673: # %vector.body4112 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_673: # %vector.body4237 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_673 b .LBB5_573 .LBB5_674: # %.preheader.i2443.preheader @@ -8719,69 +8585,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_679 b .LBB5_573 -.LBB5_680: # %vector.body4063.preheader +.LBB5_680: # %vector.body4179.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 xvldi $xr0, -1424 -.LBB5_681: # %vector.body4063 +.LBB5_681: # %vector.body4179 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a3, $a3, -16 addi.d $a1, $a1, 64 bnez $a3, .LBB5_681 -# %bb.682: # %vector.body4068.preheader +# %bb.682: # %vector.body4184.preheader lu12i.w $a1, 31 - ori $a3, $a1, 1044 - add.d $a3, $a0, $a3 - ori $a4, $zero, 2 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a4, $a1, 1048 + add.d $a4, $a0, $a4 ori $a5, $a2, 3328 -.LBB5_683: # %vector.body4068 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - addi.d $a7, $a4, -1 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa0, $a7 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a5, $a5, -2 - addi.w $a4, $a4, 2 +.LBB5_683: # %vector.body4184 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a4, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a5, $a5, -4 + addi.d $a4, $a4, 16 bnez $a5, .LBB5_683 -# %bb.684: # %vector.body4073.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2100 +# %bb.684: # %vector.body4192.preheader + lu12i.w $a4, 62 + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a3, $a4, 2104 add.d $a3, $a0, $a3 - ori $a4, $zero, 2 ori $a2, $a2, 3328 -.LBB5_685: # %vector.body4073 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a4, 31, 0 - addi.d $a6, $a4, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a2, $a2, -2 - addi.w $a4, $a4, 2 +.LBB5_685: # %vector.body4192 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a3, $a3, 16 bnez $a2, .LBB5_685 .LBB5_686: # %set1d.exit2479 ori $a1, $a1, 1020 @@ -8790,16 +8653,16 @@ init: # @init lu32i.d $a2, 0 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_688: # %vector.body4051.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_688: # %vector.body4167.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_689: # %vector.body4051 +.LBB5_689: # %vector.body4167 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8830,12 +8693,12 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_689 -# %bb.690: # %vector.body4058.preheader +# %bb.690: # %vector.body4174.preheader pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 xvldi $xr0, -1424 -.LBB5_691: # %vector.body4058 +.LBB5_691: # %vector.body4174 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -8843,28 +8706,28 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_691 b .LBB5_573 -.LBB5_692: # %vector.body4039.preheader +.LBB5_692: # %vector.body4155.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_693: # %vector.body4039 +.LBB5_693: # %vector.body4155 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_693 -# %bb.694: # %vector.body4044.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.694: # %vector.body4160.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_695: # %vector.body4044 +.LBB5_695: # %vector.body4160 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8904,15 +8767,15 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_697: # %vector.body4032 +.LBB5_697: # %vector.body4148 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8944,28 +8807,28 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_697 b .LBB5_573 -.LBB5_698: # %vector.body4020.preheader +.LBB5_698: # %vector.body4136.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_699: # %vector.body4020 +.LBB5_699: # %vector.body4136 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_699 -# %bb.700: # %vector.body4025.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.700: # %vector.body4141.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_701: # %vector.body4025 +.LBB5_701: # %vector.body4141 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8997,30 +8860,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_701 b .LBB5_573 -.LBB5_702: # %vector.body4008.preheader +.LBB5_702: # %vector.body4124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_703: # %vector.body4008 +.LBB5_703: # %vector.body4124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_703 -# %bb.704: # %vector.body4013.preheader +# %bb.704: # %vector.body4129.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_705: # %vector.body4013 +.LBB5_705: # %vector.body4129 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9052,30 +8915,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_705 b .LBB5_573 -.LBB5_706: # %vector.body3996.preheader +.LBB5_706: # %vector.body4112.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_707: # %vector.body3996 +.LBB5_707: # %vector.body4112 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_707 -# %bb.708: # %vector.body4001.preheader +# %bb.708: # %vector.body4117.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_709: # %vector.body4001 +.LBB5_709: # %vector.body4117 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9107,7 +8970,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_709 b .LBB5_573 -.LBB5_710: # %vector.body3971.preheader +.LBB5_710: # %vector.body4081.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 @@ -9115,7 +8978,7 @@ init: # @init ori $a1, $s0, 3328 xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill -.LBB5_711: # %vector.body3971 +.LBB5_711: # %vector.body4081 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9123,63 +8986,60 @@ init: # @init addi.d $a1, $a1, -16 addi.d $a0, $a0, 64 bnez $a1, .LBB5_711 -# %bb.712: # %vector.body3976.preheader +# %bb.712: # %vector.body4086.preheader lu12i.w $a1, 31 - ori $a0, $a1, 1044 - add.d $a0, $fp, $a0 - ori $a2, $zero, 2 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a2, $a1, 1048 + add.d $a2, $fp, $a2 ori $a3, $s0, 3328 -.LBB5_713: # %vector.body3976 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_713: # %vector.body4086 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_713 -# %bb.714: # %vector.body3981.preheader - lu12i.w $a0, 62 - ori $a0, $a0, 2100 +# %bb.714: # %vector.body4094.preheader + lu12i.w $a2, 62 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a2, 2104 add.d $a0, $fp, $a0 - ori $a2, $zero, 2 - ori $a3, $s0, 3328 -.LBB5_715: # %vector.body3981 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_715 -# %bb.716: # %vector.body3986.preheader + ori $a2, $s0, 3328 +.LBB5_715: # %vector.body4094 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a0, $a0, 16 + bnez $a2, .LBB5_715 +# %bb.716: # %vector.body4102.preheader lu12i.w $a0, 93 ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 xvldi $xr0, -1296 -.LBB5_717: # %vector.body3986 +.LBB5_717: # %vector.body4102 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -9198,7 +9058,7 @@ init: # @init ori $a0, $a0, 2500 add.d $a0, $fp, $a0 ori $a1, $s0, 3328 -.LBB5_719: # %vector.body3991 +.LBB5_719: # %vector.body4107 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9207,238 +9067,225 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_719 b .LBB5_573 -.LBB5_720: # %vector.body3946.preheader +.LBB5_720: # %vector.body4044.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_721: # %vector.body3946 +.LBB5_721: # %vector.body4044 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_721 -# %bb.722: # %vector.body3951.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.722: # %vector.body4049.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_723: # %vector.body3951 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_723: # %vector.body4049 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_723 -# %bb.724: # %vector.body3956.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.724: # %vector.body4057.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_725: # %vector.body3956 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_725: # %vector.body4057 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_725 -# %bb.726: # %vector.body3961.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.726: # %vector.body4065.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_727: # %vector.body3961 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_727: # %vector.body4065 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_727 -# %bb.728: # %vector.body3966.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.728: # %vector.body4073.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_729: # %vector.body3966 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_729: # %vector.body4073 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_729 b .LBB5_573 -.LBB5_730: # %vector.body3931.preheader +.LBB5_730: # %vector.body4023.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_731: # %vector.body3931 +.LBB5_731: # %vector.body4023 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_731 -# %bb.732: # %vector.body3936.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.732: # %vector.body4028.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_733: # %vector.body3936 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_733: # %vector.body4028 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_733 -# %bb.734: # %vector.body3941.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.734: # %vector.body4036.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_735: # %vector.body3941 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_735: # %vector.body4036 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_735 b .LBB5_573 -.LBB5_736: # %vector.body3921.preheader +.LBB5_736: # %vector.body4007.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_737: # %vector.body3921 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_737: # %vector.body4007 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_737 -# %bb.738: # %vector.body3926.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.738: # %vector.body4015.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_739: # %vector.body3926 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_739: # %vector.body4015 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_739 b .LBB5_573 .LBB5_740: # %.preheader.i2608.preheader @@ -9455,14 +9302,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_741: # %vector.body3911 +.LBB5_741: # %vector.body3997 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_741 -# %bb.742: # %vector.body3916.preheader +# %bb.742: # %vector.body4002.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -9470,7 +9317,7 @@ init: # @init lu12i.w $a2, 219235 ori $a2, $a2, 1981 xvreplgr2vr.w $xr0, $a2 -.LBB5_743: # %vector.body3916 +.LBB5_743: # %vector.body4002 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -9488,14 +9335,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_745: # %vector.body3904 +.LBB5_745: # %vector.body3990 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9527,248 +9374,237 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_745 b .LBB5_573 -.LBB5_746: # %vector.body3879.preheader +.LBB5_746: # %vector.body3959.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_747: # %vector.body3879 +.LBB5_747: # %vector.body3959 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_747 -# %bb.748: # %vector.body3884.preheader +# %bb.748: # %vector.body3964.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_749: # %vector.body3884 +.LBB5_749: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_749 -# %bb.750: # %vector.body3889.preheader +# %bb.750: # %vector.body3969.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_751: # %vector.body3889 +.LBB5_751: # %vector.body3969 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_751 -# %bb.752: # %vector.body3894.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.752: # %vector.body3974.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_753: # %vector.body3894 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_753: # %vector.body3974 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_753 -# %bb.754: # %vector.body3899.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.754: # %vector.body3982.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_755: # %vector.body3899 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_755: # %vector.body3982 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_755 b .LBB5_573 -.LBB5_756: # %vector.body3859.preheader +.LBB5_756: # %vector.body3930.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_757: # %vector.body3859 +.LBB5_757: # %vector.body3930 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_757 -# %bb.758: # %vector.body3864.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.758: # %vector.body3935.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_759: # %vector.body3864 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_759: # %vector.body3935 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_759 -# %bb.760: # %vector.body3869.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.760: # %vector.body3943.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_761: # %vector.body3869 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_761: # %vector.body3943 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_761 -# %bb.762: # %vector.body3874.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.762: # %vector.body3951.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_763: # %vector.body3874 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_763: # %vector.body3951 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_763 b .LBB5_573 -.LBB5_764: # %vector.body3844.preheader +.LBB5_764: # %vector.body3909.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_765: # %vector.body3844 +.LBB5_765: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_765 -# %bb.766: # %vector.body3849.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.766: # %vector.body3914.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_767: # %vector.body3849 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_767: # %vector.body3914 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_767 -# %bb.768: # %vector.body3854.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.768: # %vector.body3922.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_769: # %vector.body3854 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_769: # %vector.body3922 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_769 b .LBB5_573 .LBB5_770: # %.preheader.i2684.preheader @@ -9785,103 +9621,99 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_771: # %vector.body3829 +.LBB5_771: # %vector.body3888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_771 -# %bb.772: # %vector.body3834.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.772: # %vector.body3893.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_773: # %vector.body3834 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_773: # %vector.body3893 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_773 -# %bb.774: # %vector.body3839.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.774: # %vector.body3901.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_775: # %vector.body3839 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_775: # %vector.body3901 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_775 b .LBB5_573 -.LBB5_776: # %vector.body3819.preheader +.LBB5_776: # %vector.body3875.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 32 - lu12i.w $a2, 7 - ori $a3, $a2, 3328 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_777: # %vector.body3819 +.LBB5_777: # %vector.body3875 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 - addi.d $a1, $a1, 64 + addi.d $a2, $a2, 64 bnez $a3, .LBB5_777 -# %bb.778: # %vector.body3824.preheader - lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a0, $a0, $a1 - ori $a1, $zero, 2 - ori $a2, $a2, 3328 -.LBB5_779: # %vector.body3824 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_779 +# %bb.778: # %vector.body3880.preheader + lu12i.w $a2, 31 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 1048 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_779: # %vector.body3880 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_779 b .LBB5_573 .LBB5_780: # %.preheader.i2711.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9897,23 +9729,23 @@ init: # @init lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_781: # %vector.body3807 +.LBB5_781: # %vector.body3863 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_781 -# %bb.782: # %vector.body3812.preheader +# %bb.782: # %vector.body3868.preheader lu12i.w $a0, 62 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) pcalau12i $a2, %pc_hi20(.LCPI5_1) - xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_2) + xvld $xr1, $a2, %pc_lo12(.LCPI5_2) ori $a0, $a0, 2096 add.d $a0, $fp, $a0 ori $a1, $a1, 3328 -.LBB5_783: # %vector.body3812 +.LBB5_783: # %vector.body3868 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9959,142 +9791,135 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_785: # %vector.body3792 +.LBB5_785: # %vector.body3842 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_785 -# %bb.786: # %vector.body3797.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.786: # %vector.body3847.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_787: # %vector.body3797 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_787: # %vector.body3847 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_787 -# %bb.788: # %vector.body3802.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.788: # %vector.body3855.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_789: # %vector.body3802 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_789: # %vector.body3855 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_789 b .LBB5_573 -.LBB5_790: # %vector.body3782.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_791: # %vector.body3782 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_790: # %vector.body3826.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_791: # %vector.body3826 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_791 -# %bb.792: # %vector.body3787.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_793: # %vector.body3787 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.792: # %vector.body3834.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_793: # %vector.body3834 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_793 b .LBB5_573 -.LBB5_794: # %vector.body3770.preheader +.LBB5_794: # %vector.body3811.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_795: # %vector.body3770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_795 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_795: # %vector.body3811 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_795 # %bb.796: # %.preheader34.i2757.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -10160,127 +9985,121 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_799: # %vector.body3755 +.LBB5_799: # %vector.body3790 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_799 -# %bb.800: # %vector.body3760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.800: # %vector.body3795.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_801: # %vector.body3760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_801: # %vector.body3795 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_801 -# %bb.802: # %vector.body3765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.802: # %vector.body3803.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_803: # %vector.body3765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_803: # %vector.body3803 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_803 b .LBB5_573 -.LBB5_804: # %vector.body3740.preheader +.LBB5_804: # %vector.body3769.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_805: # %vector.body3740 +.LBB5_805: # %vector.body3769 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_805 -# %bb.806: # %vector.body3745.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.806: # %vector.body3774.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_807: # %vector.body3745 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_807: # %vector.body3774 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_807 -# %bb.808: # %vector.body3750.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.808: # %vector.body3782.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_809: # %vector.body3750 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_809: # %vector.body3782 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_809 b .LBB5_573 .LBB5_810: # %.preheader.i2797.preheader @@ -10293,14 +10112,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_811: # %vector.body3733 +.LBB5_811: # %vector.body3762 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10342,14 +10161,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_813: # %vector.body3726 +.LBB5_813: # %vector.body3755 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10391,14 +10210,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_815: # %vector.body3719 +.LBB5_815: # %vector.body3748 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10440,14 +10259,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_817: # %vector.body3712 +.LBB5_817: # %vector.body3741 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10489,14 +10308,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_819: # %vector.body3705 +.LBB5_819: # %vector.body3734 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10528,26 +10347,26 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_819 b .LBB5_573 -.LBB5_820: # %vector.body3695.preheader +.LBB5_820: # %vector.body3724.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_821: # %vector.body3695 +.LBB5_821: # %vector.body3724 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_821 -# %bb.822: # %vector.body3700.preheader +# %bb.822: # %vector.body3729.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_823: # %vector.body3700 +.LBB5_823: # %vector.body3729 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10555,95 +10374,92 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_823 b .LBB5_573 -.LBB5_824: # %vector.body3680.preheader +.LBB5_824: # %vector.body3703.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_825: # %vector.body3680 +.LBB5_825: # %vector.body3703 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_825 -# %bb.826: # %vector.body3685.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.826: # %vector.body3708.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_827: # %vector.body3685 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_827: # %vector.body3708 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_827 -# %bb.828: # %vector.body3690.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.828: # %vector.body3716.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_829: # %vector.body3690 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_829: # %vector.body3716 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_829 b .LBB5_573 -.LBB5_830: # %vector.body3668.preheader +.LBB5_830: # %vector.body3691.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_831: # %vector.body3668 +.LBB5_831: # %vector.body3691 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_831 -# %bb.832: # %vector.body3673.preheader +# %bb.832: # %vector.body3696.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_833: # %vector.body3673 +.LBB5_833: # %vector.body3696 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10675,16 +10491,16 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_833 b .LBB5_573 -.LBB5_834: # %vector.body3653.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_834: # %vector.body3674.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_835: # %vector.body3653 +.LBB5_835: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10715,7 +10531,7 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_835 -# %bb.836: # %vector.body3658.preheader +# %bb.836: # %vector.body3681.preheader pcalau12i $a1, %pc_hi20(global_data) addi.d $a1, $a1, %pc_lo12(global_data) lu12i.w $a2, 31 @@ -10723,20 +10539,20 @@ init: # @init add.d $a2, $a1, $a2 ori $a3, $a0, 3328 xvldi $xr0, -1424 -.LBB5_837: # %vector.body3658 +.LBB5_837: # %vector.body3681 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_837 -# %bb.838: # %vector.body3663.preheader +# %bb.838: # %vector.body3686.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 xvldi $xr0, -1296 -.LBB5_839: # %vector.body3663 +.LBB5_839: # %vector.body3686 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -10744,40 +10560,40 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_839 b .LBB5_573 -.LBB5_840: # %vector.body3638.preheader +.LBB5_840: # %vector.body3659.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_841: # %vector.body3638 +.LBB5_841: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_841 -# %bb.842: # %vector.body3643.preheader +# %bb.842: # %vector.body3664.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -3264 -.LBB5_843: # %vector.body3643 +.LBB5_843: # %vector.body3664 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_843 -# %bb.844: # %vector.body3648.preheader +# %bb.844: # %vector.body3669.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3265 -.LBB5_845: # %vector.body3648 +.LBB5_845: # %vector.body3669 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10785,82 +10601,78 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_845 b .LBB5_573 -.LBB5_846: # %vector.body3633.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_847: # %vector.body3633 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_847 - b .LBB5_573 -.LBB5_848: # %vector.body3623.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 +.LBB5_846: # %vector.body3651.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_849: # %vector.body3623 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_849 -# %bb.850: # %vector.body3628.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_851: # %vector.body3628 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_847: # %vector.body3651 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_847 + b .LBB5_573 +.LBB5_848: # %vector.body3635.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_849: # %vector.body3635 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_849 +# %bb.850: # %vector.body3643.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_851: # %vector.body3643 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_851 b .LBB5_573 .Lfunc_end5: diff --git a/results/MultiSource/Benchmarks/TSVC/CrossingThresholds-flt/CMakeFiles/CrossingThresholds-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/CrossingThresholds-flt/CMakeFiles/CrossingThresholds-flt.dir/tsc.s index e104600..0e5e6bc 100644 --- a/results/MultiSource/Benchmarks/TSVC/CrossingThresholds-flt/CMakeFiles/CrossingThresholds-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/CrossingThresholds-flt/CMakeFiles/CrossingThresholds-flt.dir/tsc.s @@ -11,6 +11,11 @@ .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI0_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1d .p2align 5 @@ -63,30 +68,29 @@ set1d: # @set1d bnez $a1, .LBB0_3 b .LBB0_8 .LBB0_4: # %vector.body40.preheader - addi.d $a0, $a0, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a1, %pc_hi20(.LCPI0_2) + vld $vr0, $a1, %pc_lo12(.LCPI0_2) + addi.d $a0, $a0, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB0_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB0_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB0_5 b .LBB0_8 .LBB0_6: # %.preheader.preheader move $a2, $zero @@ -118,6 +122,11 @@ set1d: # @set1d .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI1_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1ds .p2align 5 @@ -170,30 +179,29 @@ set1ds: # @set1ds bnez $a0, .LBB1_3 b .LBB1_8 .LBB1_4: # %vector.body40.preheader - addi.d $a0, $a1, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) + addi.d $a0, $a1, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB1_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB1_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB1_5 b .LBB1_8 .LBB1_6: # %.preheader.preheader move $a0, $zero @@ -685,14 +693,19 @@ check: # @check .word .LBB4_14-.LJTI4_0 .word .LBB4_17-.LJTI4_0 # -- End function - .section .rodata.cst32,"aM",@progbits,32 - .p2align 5, 0x0 # -- Begin function init + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 # -- Begin function init .LCPI5_0: + .dword 0 # 0x0 + .dword 1 # 0x1 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 +.LCPI5_1: .dword 4 # 0x4 .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 -.LCPI5_1: +.LCPI5_2: .dword 0 # 0x0 .dword 1 # 0x1 .dword 2 # 0x2 @@ -703,11 +716,12 @@ check: # @check .type init,@function init: # @init # %bb.0: - addi.d $sp, $sp, -80 - st.d $ra, $sp, 72 # 8-byte Folded Spill - st.d $fp, $sp, 64 # 8-byte Folded Spill - st.d $s0, $sp, 56 # 8-byte Folded Spill - st.d $s1, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -96 + st.d $ra, $sp, 88 # 8-byte Folded Spill + st.d $fp, $sp, 80 # 8-byte Folded Spill + st.d $s0, $sp, 72 # 8-byte Folded Spill + st.d $s1, $sp, 64 # 8-byte Folded Spill + st.d $s2, $sp, 56 # 8-byte Folded Spill move $fp, $a0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a1, $a0, %pc_lo12(.L.str.1) @@ -1661,131 +1675,122 @@ init: # @init jirl $ra, $ra, 0 bnez $a0, .LBB5_573 # %bb.136: # %vector.body.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 + addi.d $a3, $a0, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 .LBB5_137: # %vector.body # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_137 # %bb.138: # %vector.body3598.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 + lu12i.w $a3, 31 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 .LBB5_139: # %vector.body3598 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_139 -# %bb.140: # %vector.body3603.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_141: # %vector.body3603 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.140: # %vector.body3606.preheader + lu12i.w $a3, 62 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_141: # %vector.body3606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_141 -# %bb.142: # %vector.body3608.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_143: # %vector.body3608 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.142: # %vector.body3614.preheader + lu12i.w $a3, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_143: # %vector.body3614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_143 -# %bb.144: # %vector.body3613.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_145: # %vector.body3613 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 - bnez $a1, .LBB5_145 +# %bb.144: # %vector.body3622.preheader + lu12i.w $a3, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a3, 136 + add.d $a1, $a0, $a1 + ori $a2, $a2, 3328 +.LBB5_145: # %vector.body3622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_145 # %bb.146: # %.preheader34.i2938.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -1837,7 +1842,7 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_147 b .LBB5_573 -.LBB5_148: # %vector.body5445.preheader +.LBB5_148: # %vector.body5846.preheader pcalau12i $a0, %pc_hi20(X+4) addi.d $a0, $a0, %pc_lo12(X+4) pcalau12i $a1, %pc_hi20(V+4) @@ -1852,7 +1857,7 @@ init: # @init lu12i.w $a6, 7 ori $a6, $a6, 3328 .p2align 4, , 16 -.LBB5_149: # %vector.body5445 +.LBB5_149: # %vector.body5846 # =>This Inner Loop Header: Depth=1 bstrpick.d $a7, $a5, 31, 0 addi.d $t0, $a5, -5 @@ -1896,7 +1901,7 @@ init: # @init addi.d $a4, $a4, 8 bnez $a6, .LBB5_149 b .LBB5_573 -.LBB5_150: # %vector.body5412.preheader +.LBB5_150: # %vector.body5813.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -1904,24 +1909,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_151: # %vector.body5412 +.LBB5_151: # %vector.body5813 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_151 -# %bb.152: # %vector.body5417.preheader +# %bb.152: # %vector.body5818.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_153: # %vector.body5417 +.LBB5_153: # %vector.body5818 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1952,15 +1957,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_153 -# %bb.154: # %vector.body5424.preheader +# %bb.154: # %vector.body5825.preheader lu12i.w $a4, 62 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 2096 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_155: # %vector.body5424 +.LBB5_155: # %vector.body5825 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1991,15 +1996,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_155 -# %bb.156: # %vector.body5431.preheader +# %bb.156: # %vector.body5832.preheader lu12i.w $a4, 93 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 3152 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_157: # %vector.body5431 +.LBB5_157: # %vector.body5832 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2030,15 +2035,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_157 -# %bb.158: # %vector.body5438.preheader +# %bb.158: # %vector.body5839.preheader lu12i.w $a4, 125 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_159: # %vector.body5438 +.LBB5_159: # %vector.body5839 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2070,7 +2075,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_159 b .LBB5_573 -.LBB5_160: # %vector.body5400.preheader +.LBB5_160: # %vector.body5801.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2078,24 +2083,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_161: # %vector.body5400 +.LBB5_161: # %vector.body5801 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_161 -# %bb.162: # %vector.body5405.preheader +# %bb.162: # %vector.body5806.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_163: # %vector.body5405 +.LBB5_163: # %vector.body5806 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2127,7 +2132,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_163 b .LBB5_573 -.LBB5_164: # %vector.body5388.preheader +.LBB5_164: # %vector.body5789.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2135,24 +2140,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_165: # %vector.body5388 +.LBB5_165: # %vector.body5789 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_165 -# %bb.166: # %vector.body5393.preheader +# %bb.166: # %vector.body5794.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_167: # %vector.body5393 +.LBB5_167: # %vector.body5794 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2288,7 +2293,7 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_171 b .LBB5_573 -.LBB5_172: # %vector.body5354.preheader +.LBB5_172: # %vector.body5755.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2296,7 +2301,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_173: # %vector.body5354 +.LBB5_173: # %vector.body5755 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2436,14 +2441,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_179 b .LBB5_573 -.LBB5_180: # %vector.body5349.preheader +.LBB5_180: # %vector.body5750.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_181: # %vector.body5349 +.LBB5_181: # %vector.body5750 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -2451,7 +2456,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_181 b .LBB5_573 -.LBB5_182: # %vector.body5339.preheader +.LBB5_182: # %vector.body5740.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2459,7 +2464,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_183: # %vector.body5339 +.LBB5_183: # %vector.body5740 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2610,30 +2615,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_189 b .LBB5_573 -.LBB5_190: # %vector.body5315.preheader +.LBB5_190: # %vector.body5716.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_191: # %vector.body5315 +.LBB5_191: # %vector.body5716 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_191 -# %bb.192: # %vector.body5320.preheader +# %bb.192: # %vector.body5721.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_193: # %vector.body5320 +.LBB5_193: # %vector.body5721 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2665,30 +2670,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_193 b .LBB5_573 -.LBB5_194: # %vector.body5303.preheader +.LBB5_194: # %vector.body5704.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_195: # %vector.body5303 +.LBB5_195: # %vector.body5704 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_195 -# %bb.196: # %vector.body5308.preheader +# %bb.196: # %vector.body5709.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_197: # %vector.body5308 +.LBB5_197: # %vector.body5709 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2734,74 +2739,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_199: # %vector.body5283 +.LBB5_199: # %vector.body5678 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_199 -# %bb.200: # %vector.body5288.preheader +# %bb.200: # %vector.body5683.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_201: # %vector.body5288 +.LBB5_201: # %vector.body5683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_201 -# %bb.202: # %vector.body5293.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.202: # %vector.body5688.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_203: # %vector.body5293 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_203: # %vector.body5688 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_203 -# %bb.204: # %vector.body5298.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.204: # %vector.body5696.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_205: # %vector.body5298 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_205: # %vector.body5696 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_205 b .LBB5_573 .LBB5_206: # %.preheader.i1195.preheader @@ -2818,74 +2820,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_207: # %vector.body5263 +.LBB5_207: # %vector.body5652 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_207 -# %bb.208: # %vector.body5268.preheader +# %bb.208: # %vector.body5657.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_209: # %vector.body5268 +.LBB5_209: # %vector.body5657 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_209 -# %bb.210: # %vector.body5273.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.210: # %vector.body5662.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_211: # %vector.body5273 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_211: # %vector.body5662 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_211 -# %bb.212: # %vector.body5278.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.212: # %vector.body5670.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_213: # %vector.body5278 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_213: # %vector.body5670 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_213 b .LBB5_573 .LBB5_214: # %.preheader.i1217.preheader @@ -3072,31 +3071,30 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_221 -# %bb.222: # %vector.body5236.preheader - pcalau12i $a1, %pc_hi20(array+4) - addi.d $a1, $a1, %pc_lo12(array+4) - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_223: # %vector.body5236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_223 +# %bb.222: # %vector.body5622.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(array+8) + addi.d $a1, $a1, %pc_lo12(array+8) + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_223: # %vector.body5622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_223 # %bb.224: # %.preheader34.i1253.preheader lu12i.w $a1, 284 ori $a1, $a1, 1888 @@ -3162,87 +3160,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_227: # %vector.body5211 +.LBB5_227: # %vector.body5588 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_227 -# %bb.228: # %vector.body5216.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.228: # %vector.body5593.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_229: # %vector.body5216 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_229: # %vector.body5593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_229 -# %bb.230: # %vector.body5221.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.230: # %vector.body5601.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_231: # %vector.body5221 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_231: # %vector.body5601 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_231 -# %bb.232: # %vector.body5226.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.232: # %vector.body5609.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_233: # %vector.body5226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_233: # %vector.body5609 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_233 b .LBB5_573 .LBB5_234: # %.preheader.i1284.preheader @@ -3259,32 +3252,32 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -3264 -.LBB5_235: # %vector.body5196 +.LBB5_235: # %vector.body5573 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_235 -# %bb.236: # %vector.body5201.preheader +# %bb.236: # %vector.body5578.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_237: # %vector.body5201 +.LBB5_237: # %vector.body5578 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_237 -# %bb.238: # %vector.body5206.preheader +# %bb.238: # %vector.body5583.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_239: # %vector.body5206 +.LBB5_239: # %vector.body5583 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -3292,30 +3285,30 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_239 b .LBB5_573 -.LBB5_240: # %vector.body5184.preheader +.LBB5_240: # %vector.body5561.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_241: # %vector.body5184 +.LBB5_241: # %vector.body5561 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_241 -# %bb.242: # %vector.body5189.preheader +# %bb.242: # %vector.body5566.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_243: # %vector.body5189 +.LBB5_243: # %vector.body5566 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3392,65 +3385,62 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_245 -# %bb.246: # %vector.body5174.preheader +# %bb.246: # %vector.body5545.preheader lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_247: # %vector.body5174 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_247: # %vector.body5545 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_247 -# %bb.248: # %vector.body5179.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.248: # %vector.body5553.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_249: # %vector.body5179 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_249: # %vector.body5553 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_249 b .LBB5_573 -.LBB5_250: # %vector.body5157.preheader +.LBB5_250: # %vector.body5528.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_251: # %vector.body5157 +.LBB5_251: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3506,30 +3496,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_253 b .LBB5_573 -.LBB5_254: # %vector.body5145.preheader +.LBB5_254: # %vector.body5516.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_255: # %vector.body5145 +.LBB5_255: # %vector.body5516 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_255 -# %bb.256: # %vector.body5150.preheader +# %bb.256: # %vector.body5521.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_257: # %vector.body5150 +.LBB5_257: # %vector.body5521 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3561,14 +3551,14 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_257 b .LBB5_573 -.LBB5_258: # %vector.body5125.preheader +.LBB5_258: # %vector.body5487.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 xvldi $xr0, -1424 -.LBB5_259: # %vector.body5125 +.LBB5_259: # %vector.body5487 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3583,274 +3573,263 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 + lu12i.w $a1, 62 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 2104 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_261: # %vector.body5130 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_261: # %vector.body5492 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_261 -# %bb.262: # %vector.body5135.preheader - lu12i.w $a0, 93 - ori $a0, $a0, 3156 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 +# %bb.262: # %vector.body5500.preheader + lu12i.w $a1, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 3160 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_263: # %vector.body5135 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_263: # %vector.body5500 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_263 -# %bb.264: # %vector.body5140.preheader - lu12i.w $a0, 125 - ori $a0, $a0, 132 +# %bb.264: # %vector.body5508.preheader + lu12i.w $a1, 125 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a1, 136 add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - ori $a2, $s0, 3328 -.LBB5_265: # %vector.body5140 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_265 + ori $a1, $s0, 3328 +.LBB5_265: # %vector.body5508 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_265 b .LBB5_573 -.LBB5_266: # %vector.body5094.preheader +.LBB5_266: # %vector.body5450.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_267: # %vector.body5094 +.LBB5_267: # %vector.body5450 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_267 -# %bb.268: # %vector.body5099.preheader +# %bb.268: # %vector.body5455.preheader lu12i.w $a2, 31 ori $a3, $a2, 1048 add.d $a4, $a0, $a3 lu12i.w $a3, 3 ori $a5, $a3, 3712 lu12i.w $a6, 260096 -.LBB5_269: # %vector.body5099 +.LBB5_269: # %vector.body5455 # =>This Inner Loop Header: Depth=1 st.w $a6, $a4, -8 st.w $a6, $a4, 0 addi.d $a5, $a5, -2 addi.d $a4, $a4, 16 bnez $a5, .LBB5_269 -# %bb.270: # %vector.body5104.preheader +# %bb.270: # %vector.body5460.preheader ori $a2, $a2, 1052 add.d $a2, $a0, $a2 ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 -.LBB5_271: # %vector.body5104 +.LBB5_271: # %vector.body5460 # =>This Inner Loop Header: Depth=1 st.w $a4, $a2, -8 st.w $a4, $a2, 0 addi.d $a3, $a3, -2 addi.d $a2, $a2, 16 bnez $a3, .LBB5_271 -# %bb.272: # %vector.body5110.preheader +# %bb.272: # %vector.body5466.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_273: # %vector.body5110 +.LBB5_273: # %vector.body5466 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_273 -# %bb.274: # %vector.body5115.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.274: # %vector.body5471.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_275: # %vector.body5115 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_275: # %vector.body5471 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_275 -# %bb.276: # %vector.body5120.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.276: # %vector.body5479.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_277: # %vector.body5120 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_277: # %vector.body5479 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_277 b .LBB5_573 -.LBB5_278: # %vector.body5079.preheader +.LBB5_278: # %vector.body5429.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_279: # %vector.body5079 +.LBB5_279: # %vector.body5429 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_279 -# %bb.280: # %vector.body5084.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.280: # %vector.body5434.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_281: # %vector.body5084 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_281: # %vector.body5434 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_281 -# %bb.282: # %vector.body5089.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.282: # %vector.body5442.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_283: # %vector.body5089 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_283: # %vector.body5442 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_283 b .LBB5_573 -.LBB5_284: # %vector.body5067.preheader +.LBB5_284: # %vector.body5417.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_285: # %vector.body5067 +.LBB5_285: # %vector.body5417 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_285 -# %bb.286: # %vector.body5072.preheader +# %bb.286: # %vector.body5422.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_287: # %vector.body5072 +.LBB5_287: # %vector.body5422 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3882,30 +3861,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_287 b .LBB5_573 -.LBB5_288: # %vector.body5055.preheader +.LBB5_288: # %vector.body5405.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_289: # %vector.body5055 +.LBB5_289: # %vector.body5405 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_289 -# %bb.290: # %vector.body5060.preheader +# %bb.290: # %vector.body5410.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_291: # %vector.body5060 +.LBB5_291: # %vector.body5410 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3937,30 +3916,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_291 b .LBB5_573 -.LBB5_292: # %vector.body5043.preheader +.LBB5_292: # %vector.body5393.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_293: # %vector.body5043 +.LBB5_293: # %vector.body5393 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_293 -# %bb.294: # %vector.body5048.preheader +# %bb.294: # %vector.body5398.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_295: # %vector.body5048 +.LBB5_295: # %vector.body5398 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3992,30 +3971,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_295 b .LBB5_573 -.LBB5_296: # %vector.body5031.preheader +.LBB5_296: # %vector.body5381.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_297: # %vector.body5031 +.LBB5_297: # %vector.body5381 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_297 -# %bb.298: # %vector.body5036.preheader +# %bb.298: # %vector.body5386.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_299: # %vector.body5036 +.LBB5_299: # %vector.body5386 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4047,30 +4026,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_299 b .LBB5_573 -.LBB5_300: # %vector.body5019.preheader +.LBB5_300: # %vector.body5369.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_301: # %vector.body5019 +.LBB5_301: # %vector.body5369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_301 -# %bb.302: # %vector.body5024.preheader +# %bb.302: # %vector.body5374.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_303: # %vector.body5024 +.LBB5_303: # %vector.body5374 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4102,69 +4081,66 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_303 b .LBB5_573 -.LBB5_304: # %vector.body5004.preheader +.LBB5_304: # %vector.body5348.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_305: # %vector.body5004 +.LBB5_305: # %vector.body5348 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_305 -# %bb.306: # %vector.body5009.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.306: # %vector.body5353.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_307: # %vector.body5009 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_307: # %vector.body5353 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_307 -# %bb.308: # %vector.body5014.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.308: # %vector.body5361.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_309: # %vector.body5014 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_309: # %vector.body5361 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_309 b .LBB5_573 .LBB5_310: # %.preheader.i1469.preheader @@ -4181,254 +4157,241 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_311: # %vector.body4984 +.LBB5_311: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_311 -# %bb.312: # %vector.body4989.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.312: # %vector.body5324.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_313: # %vector.body4989 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_313: # %vector.body5324 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_313 -# %bb.314: # %vector.body4994.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.314: # %vector.body5332.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_315: # %vector.body4994 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_315: # %vector.body5332 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_315 -# %bb.316: # %vector.body4999.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.316: # %vector.body5340.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_317: # %vector.body4999 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_317: # %vector.body5340 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_317 b .LBB5_573 -.LBB5_318: # %vector.body4964.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_319: # %vector.body4964 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_318: # %vector.body5293.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_319: # %vector.body5293 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_319 -# %bb.320: # %vector.body4969.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1072 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 +# %bb.320: # %vector.body5301.preheader + lu12i.w $a3, 31 + ori $a3, $a3, 1072 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 xvldi $xr0, -1424 -.LBB5_321: # %vector.body4969 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_321 -# %bb.322: # %vector.body4974.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2128 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 -.LBB5_323: # %vector.body4974 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_323 -# %bb.324: # %vector.body4979.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_325: # %vector.body4979 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_321: # %vector.body5301 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_321 +# %bb.322: # %vector.body5306.preheader + lu12i.w $a3, 62 + ori $a3, $a3, 2128 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 +.LBB5_323: # %vector.body5306 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_323 +# %bb.324: # %vector.body5311.preheader + lu12i.w $a3, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 3160 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_325: # %vector.body5311 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_325 b .LBB5_573 -.LBB5_326: # %vector.body4944.preheader +.LBB5_326: # %vector.body5264.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_327: # %vector.body4944 +.LBB5_327: # %vector.body5264 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_327 -# %bb.328: # %vector.body4949.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.328: # %vector.body5269.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_329: # %vector.body4949 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_329: # %vector.body5269 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_329 -# %bb.330: # %vector.body4954.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.330: # %vector.body5277.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_331: # %vector.body4954 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_331: # %vector.body5277 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_331 -# %bb.332: # %vector.body4959.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.332: # %vector.body5285.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_333: # %vector.body4959 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_333: # %vector.body5285 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_333 b .LBB5_573 .LBB5_334: # %.preheader.i1529.preheader @@ -4445,19 +4408,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_335: # %vector.body4934 +.LBB5_335: # %vector.body5254 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_335 -# %bb.336: # %vector.body4939.preheader +# %bb.336: # %vector.body5259.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_337: # %vector.body4939 +.LBB5_337: # %vector.body5259 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -4915,69 +4878,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_358 b .LBB5_573 -.LBB5_359: # %vector.body4850.preheader +.LBB5_359: # %vector.body5164.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_360: # %vector.body4850 +.LBB5_360: # %vector.body5164 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_360 -# %bb.361: # %vector.body4855.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.361: # %vector.body5169.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_362: # %vector.body4855 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a5 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_362: # %vector.body5169 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a3, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_362 -# %bb.363: # %vector.body4860.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.363: # %vector.body5177.preheader + lu12i.w $a3, 62 + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 ori $a1, $a1, 3328 -.LBB5_364: # %vector.body4860 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a4 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 +.LBB5_364: # %vector.body5177 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a2, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a1, $a1, -4 + addi.d $a2, $a2, 16 bnez $a1, .LBB5_364 # %bb.365: # %.preheader.i1631.preheader lu12i.w $a1, 156 @@ -5071,50 +5031,50 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_368 b .LBB5_573 -.LBB5_369: # %vector.body4830.preheader +.LBB5_369: # %vector.body5144.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_370: # %vector.body4830 +.LBB5_370: # %vector.body5144 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_370 -# %bb.371: # %vector.body4835.preheader +# %bb.371: # %vector.body5149.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_372: # %vector.body4835 +.LBB5_372: # %vector.body5149 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_372 -# %bb.373: # %vector.body4840.preheader +# %bb.373: # %vector.body5154.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_374: # %vector.body4840 +.LBB5_374: # %vector.body5154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_374 -# %bb.375: # %vector.body4845.preheader +# %bb.375: # %vector.body5159.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_376: # %vector.body4845 +.LBB5_376: # %vector.body5159 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5122,7 +5082,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_376 b .LBB5_573 -.LBB5_377: # %vector.body4810.preheader +.LBB5_377: # %vector.body5124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -5131,43 +5091,43 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_378: # %vector.body4810 +.LBB5_378: # %vector.body5124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_378 -# %bb.379: # %vector.body4815.preheader +# %bb.379: # %vector.body5129.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_380: # %vector.body4815 +.LBB5_380: # %vector.body5129 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_380 -# %bb.381: # %vector.body4820.preheader +# %bb.381: # %vector.body5134.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_382: # %vector.body4820 +.LBB5_382: # %vector.body5134 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_382 -# %bb.383: # %vector.body4825.preheader +# %bb.383: # %vector.body5139.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_384: # %vector.body4825 +.LBB5_384: # %vector.body5139 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5189,87 +5149,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_386: # %vector.body4790 +.LBB5_386: # %vector.body5095 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_386 -# %bb.387: # %vector.body4795.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.387: # %vector.body5100.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_388: # %vector.body4795 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_388: # %vector.body5100 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_388 -# %bb.389: # %vector.body4800.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.389: # %vector.body5108.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_390: # %vector.body4800 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_390: # %vector.body5108 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_390 -# %bb.391: # %vector.body4805.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.391: # %vector.body5116.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_392: # %vector.body4805 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_392: # %vector.body5116 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_392 b .LBB5_573 .LBB5_393: # %.preheader.i1701.preheader @@ -5286,14 +5241,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_394: # %vector.body4775 +.LBB5_394: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_394 -# %bb.395: # %vector.body4780.preheader +# %bb.395: # %vector.body5085.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -5301,19 +5256,19 @@ init: # @init lu12i.w $a3, 219235 ori $a3, $a3, 1981 xvreplgr2vr.w $xr0, $a3 -.LBB5_396: # %vector.body4780 +.LBB5_396: # %vector.body5085 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_396 -# %bb.397: # %vector.body4785.preheader +# %bb.397: # %vector.body5090.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_398: # %vector.body4785 +.LBB5_398: # %vector.body5090 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5335,87 +5290,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_400: # %vector.body4755 +.LBB5_400: # %vector.body5051 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_400 -# %bb.401: # %vector.body4760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.401: # %vector.body5056.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_402: # %vector.body4760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_402: # %vector.body5056 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_402 -# %bb.403: # %vector.body4765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.403: # %vector.body5064.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_404: # %vector.body4765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_404: # %vector.body5064 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_404 -# %bb.405: # %vector.body4770.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.405: # %vector.body5072.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_406: # %vector.body4770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_406: # %vector.body5072 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_406 b .LBB5_573 .LBB5_407: # %.preheader.i1740.preheader @@ -5432,19 +5382,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_408: # %vector.body4745 +.LBB5_408: # %vector.body5041 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_408 -# %bb.409: # %vector.body4750.preheader +# %bb.409: # %vector.body5046.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_410: # %vector.body4750 +.LBB5_410: # %vector.body5046 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5452,21 +5402,21 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_410 b .LBB5_573 -.LBB5_411: # %vector.body4725.preheader +.LBB5_411: # %vector.body5018.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_412: # %vector.body4725 +.LBB5_412: # %vector.body5018 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_412 -# %bb.413: # %vector.body4730.preheader +# %bb.413: # %vector.body5023.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 @@ -5474,49 +5424,48 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr1, $a4 -.LBB5_414: # %vector.body4730 +.LBB5_414: # %vector.body5023 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a2, -32 xvst $xr1, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_414 -# %bb.415: # %vector.body4735.preheader +# %bb.415: # %vector.body5028.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_416: # %vector.body4735 +.LBB5_416: # %vector.body5028 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_416 -# %bb.417: # %vector.body4740.preheader +# %bb.417: # %vector.body5033.preheader lu12i.w $a2, 93 - ori $a2, $a2, 3156 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_418: # %vector.body4740 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_418: # %vector.body5033 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_418 b .LBB5_573 .LBB5_419: # %.preheader.i1769.preheader @@ -5533,7 +5482,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_420: # %vector.body4720 +.LBB5_420: # %vector.body5013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5555,7 +5504,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_422: # %vector.body4715 +.LBB5_422: # %vector.body5008 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5563,14 +5512,14 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_422 b .LBB5_573 -.LBB5_423: # %vector.body4700.preheader +.LBB5_423: # %vector.body4993.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_424: # %vector.body4700 +.LBB5_424: # %vector.body4993 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5663,14 +5612,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_428 b .LBB5_573 -.LBB5_429: # %vector.body4685.preheader +.LBB5_429: # %vector.body4978.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_430: # %vector.body4685 +.LBB5_430: # %vector.body4978 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5763,32 +5712,31 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_434 b .LBB5_573 -.LBB5_435: # %vector.body4663.preheader +.LBB5_435: # %vector.body4947.preheader + pcalau12i $s1, %pc_hi20(.LCPI5_0) + vld $vr0, $s1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) - addi.d $a0, $s0, 4 - ori $a1, $zero, 2 - lu12i.w $s1, 7 - ori $a2, $s1, 3328 -.LBB5_436: # %vector.body4663 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_436 + addi.d $a0, $s0, 8 + lu12i.w $s2, 7 + ori $a1, $s2, 3328 +.LBB5_436: # %vector.body4947 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_436 # %bb.437: # %.preheader.i1826.preheader lu12i.w $a1, 31 ori $a0, $a1, 1040 @@ -5799,54 +5747,50 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 2104 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_438: # %vector.body4668 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_438 -# %bb.439: # %vector.body4673.preheader + ori $a1, $s2, 3328 +.LBB5_438: # %vector.body4955 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_438 +# %bb.439: # %vector.body4963.preheader lu12i.w $a0, 93 - ori $a0, $a0, 3156 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 3160 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_440: # %vector.body4673 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_440 + ori $a1, $s2, 3328 +.LBB5_440: # %vector.body4963 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_440 # %bb.441: # %.preheader.i1840.preheader lu12i.w $a0, 125 ori $a0, $a0, 128 @@ -5905,30 +5849,30 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_442 b .LBB5_573 -.LBB5_443: # %vector.body4639.preheader +.LBB5_443: # %vector.body4923.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_444: # %vector.body4639 +.LBB5_444: # %vector.body4923 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_444 -# %bb.445: # %vector.body4644.preheader +# %bb.445: # %vector.body4928.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 -.LBB5_446: # %vector.body4644 +.LBB5_446: # %vector.body4928 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5959,14 +5903,14 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_446 -# %bb.447: # %vector.body4651.preheader +# %bb.447: # %vector.body4935.preheader lu12i.w $a4, 62 - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 2096 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_448: # %vector.body4651 +.LBB5_448: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5997,12 +5941,12 @@ init: # @init addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 bnez $a3, .LBB5_448 -# %bb.449: # %vector.body4658.preheader +# %bb.449: # %vector.body4942.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_450: # %vector.body4658 +.LBB5_450: # %vector.body4942 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6010,154 +5954,148 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_450 b .LBB5_573 -.LBB5_451: # %vector.body4624.preheader +.LBB5_451: # %vector.body4902.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_452: # %vector.body4624 +.LBB5_452: # %vector.body4902 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_452 -# %bb.453: # %vector.body4629.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.453: # %vector.body4907.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_454: # %vector.body4629 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_454: # %vector.body4907 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_454 -# %bb.455: # %vector.body4634.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.455: # %vector.body4915.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_456: # %vector.body4634 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_456: # %vector.body4915 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_456 b .LBB5_573 -.LBB5_457: # %vector.body4599.preheader +.LBB5_457: # %vector.body4871.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_458: # %vector.body4599 +.LBB5_458: # %vector.body4871 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_458 -# %bb.459: # %vector.body4604.preheader +# %bb.459: # %vector.body4876.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_460: # %vector.body4604 +.LBB5_460: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_460 -# %bb.461: # %vector.body4609.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.461: # %vector.body4881.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_462: # %vector.body4609 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_462: # %vector.body4881 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_462 -# %bb.463: # %vector.body4614.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.463: # %vector.body4889.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_464: # %vector.body4614 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_464 -# %bb.465: # %vector.body4619.preheader + ori $a3, $a1, 3328 +.LBB5_464: # %vector.body4889 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 + bnez $a3, .LBB5_464 +# %bb.465: # %vector.body4897.preheader lu12i.w $a2, 125 ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3264 -.LBB5_466: # %vector.body4619 +.LBB5_466: # %vector.body4897 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6165,45 +6103,45 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_466 b .LBB5_573 -.LBB5_467: # %vector.body4574.preheader +.LBB5_467: # %vector.body4843.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_468: # %vector.body4574 +.LBB5_468: # %vector.body4843 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_468 -# %bb.469: # %vector.body4579.preheader +# %bb.469: # %vector.body4848.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_470: # %vector.body4579 +.LBB5_470: # %vector.body4848 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_470 -# %bb.471: # %vector.body4584.preheader +# %bb.471: # %vector.body4853.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_472: # %vector.body4584 +.LBB5_472: # %vector.body4853 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_472 -# %bb.473: # %vector.body4589.preheader +# %bb.473: # %vector.body4858.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a2, $a0, $a2 @@ -6211,37 +6149,36 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_474: # %vector.body4589 +.LBB5_474: # %vector.body4858 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_474 -# %bb.475: # %vector.body4594.preheader +# %bb.475: # %vector.body4863.preheader lu12i.w $a2, 125 - ori $a2, $a2, 132 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_476: # %vector.body4594 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_476: # %vector.body4863 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_476 b .LBB5_573 .LBB5_477: # %.preheader.i1927.preheader @@ -6258,74 +6195,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_478: # %vector.body4554 +.LBB5_478: # %vector.body4817 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_478 -# %bb.479: # %vector.body4559.preheader +# %bb.479: # %vector.body4822.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_480: # %vector.body4559 +.LBB5_480: # %vector.body4822 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_480 -# %bb.481: # %vector.body4564.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.481: # %vector.body4827.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_482: # %vector.body4564 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_482: # %vector.body4827 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_482 -# %bb.483: # %vector.body4569.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.483: # %vector.body4835.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_484: # %vector.body4569 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_484: # %vector.body4835 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_484 b .LBB5_573 .LBB5_485: # %.preheader.i1949.preheader @@ -6461,669 +6395,638 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_490 b .LBB5_573 -.LBB5_491: # %vector.body4519.preheader +.LBB5_491: # %vector.body4773.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_492: # %vector.body4519 +.LBB5_492: # %vector.body4773 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_492 -# %bb.493: # %vector.body4524.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.493: # %vector.body4778.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_494: # %vector.body4524 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_494: # %vector.body4778 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_494 -# %bb.495: # %vector.body4529.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.495: # %vector.body4786.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_496: # %vector.body4529 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_496: # %vector.body4786 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_496 -# %bb.497: # %vector.body4534.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.497: # %vector.body4794.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_498: # %vector.body4534 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_498: # %vector.body4794 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_498 b .LBB5_573 -.LBB5_499: # %vector.body4489.preheader +.LBB5_499: # %vector.body4734.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_500: # %vector.body4489 +.LBB5_500: # %vector.body4734 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_500 -# %bb.501: # %vector.body4494.preheader +# %bb.501: # %vector.body4739.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_502: # %vector.body4494 +.LBB5_502: # %vector.body4739 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_502 -# %bb.503: # %vector.body4499.preheader +# %bb.503: # %vector.body4744.preheader lu12i.w $a2, 46 ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_504: # %vector.body4499 +.LBB5_504: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_504 -# %bb.505: # %vector.body4504.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.505: # %vector.body4749.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_506: # %vector.body4504 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_506: # %vector.body4749 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_506 -# %bb.507: # %vector.body4509.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.507: # %vector.body4757.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_508: # %vector.body4509 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_508: # %vector.body4757 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_508 -# %bb.509: # %vector.body4514.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.509: # %vector.body4765.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_510: # %vector.body4514 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_510: # %vector.body4765 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_510 b .LBB5_573 -.LBB5_511: # %vector.body4459.preheader +.LBB5_511: # %vector.body4695.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_512: # %vector.body4459 +.LBB5_512: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_512 -# %bb.513: # %vector.body4464.preheader +# %bb.513: # %vector.body4700.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_514: # %vector.body4464 +.LBB5_514: # %vector.body4700 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_514 -# %bb.515: # %vector.body4469.preheader +# %bb.515: # %vector.body4705.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_516: # %vector.body4469 +.LBB5_516: # %vector.body4705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_516 -# %bb.517: # %vector.body4474.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.517: # %vector.body4710.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_518: # %vector.body4474 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_518: # %vector.body4710 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_518 -# %bb.519: # %vector.body4479.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.519: # %vector.body4718.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_520: # %vector.body4479 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_520: # %vector.body4718 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_520 -# %bb.521: # %vector.body4484.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.521: # %vector.body4726.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_522: # %vector.body4484 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_522: # %vector.body4726 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_522 b .LBB5_573 -.LBB5_523: # %vector.body4429.preheader +.LBB5_523: # %vector.body4656.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_524: # %vector.body4429 +.LBB5_524: # %vector.body4656 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_524 -# %bb.525: # %vector.body4434.preheader +# %bb.525: # %vector.body4661.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_526: # %vector.body4434 +.LBB5_526: # %vector.body4661 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_526 -# %bb.527: # %vector.body4439.preheader +# %bb.527: # %vector.body4666.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_528: # %vector.body4439 +.LBB5_528: # %vector.body4666 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_528 -# %bb.529: # %vector.body4444.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.529: # %vector.body4671.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_530: # %vector.body4444 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_530: # %vector.body4671 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_530 -# %bb.531: # %vector.body4449.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.531: # %vector.body4679.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_532: # %vector.body4449 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_532: # %vector.body4679 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_532 -# %bb.533: # %vector.body4454.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.533: # %vector.body4687.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_534: # %vector.body4454 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_534: # %vector.body4687 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_534 b .LBB5_573 -.LBB5_535: # %vector.body4404.preheader +.LBB5_535: # %vector.body4622.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_536: # %vector.body4404 +.LBB5_536: # %vector.body4622 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_536 -# %bb.537: # %vector.body4409.preheader +# %bb.537: # %vector.body4627.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_538: # %vector.body4409 +.LBB5_538: # %vector.body4627 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_538 -# %bb.539: # %vector.body4414.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.539: # %vector.body4632.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_540: # %vector.body4414 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_540: # %vector.body4632 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_540 -# %bb.541: # %vector.body4419.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.541: # %vector.body4640.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_542: # %vector.body4419 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_542: # %vector.body4640 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_542 -# %bb.543: # %vector.body4424.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.543: # %vector.body4648.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_544: # %vector.body4424 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_544: # %vector.body4648 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_544 b .LBB5_573 -.LBB5_545: # %vector.body4389.preheader +.LBB5_545: # %vector.body4601.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_546: # %vector.body4389 +.LBB5_546: # %vector.body4601 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_546 -# %bb.547: # %vector.body4394.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.547: # %vector.body4606.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_548: # %vector.body4394 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_548: # %vector.body4606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_548 -# %bb.549: # %vector.body4399.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.549: # %vector.body4614.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_550: # %vector.body4399 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_550: # %vector.body4614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_550 b .LBB5_573 -.LBB5_551: # %vector.body4374.preheader +.LBB5_551: # %vector.body4580.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_552: # %vector.body4374 +.LBB5_552: # %vector.body4580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_552 -# %bb.553: # %vector.body4379.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.553: # %vector.body4585.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_554: # %vector.body4379 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_554: # %vector.body4585 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_554 -# %bb.555: # %vector.body4384.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.555: # %vector.body4593.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_556: # %vector.body4384 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_556: # %vector.body4593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_556 b .LBB5_573 .LBB5_557: # %.preheader.i2121.preheader @@ -7140,19 +7043,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_558: # %vector.body4364 +.LBB5_558: # %vector.body4570 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_558 -# %bb.559: # %vector.body4369.preheader +# %bb.559: # %vector.body4575.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_560: # %vector.body4369 +.LBB5_560: # %vector.body4575 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7174,7 +7077,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_562: # %vector.body4359 +.LBB5_562: # %vector.body4565 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7196,7 +7099,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_564: # %vector.body4354 +.LBB5_564: # %vector.body4560 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7204,31 +7107,30 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_564 b .LBB5_573 -.LBB5_565: # %vector.body4349.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_566: # %vector.body4349 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_566 +.LBB5_565: # %vector.body4552.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_566: # %vector.body4552 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_566 b .LBB5_573 .LBB5_567: # %.preheader.i2154.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7377,43 +7279,43 @@ init: # @init bnez $a2, .LBB5_572 .LBB5_573: # %set1d.exit1086 move $a0, $zero - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $fp, $sp, 64 # 8-byte Folded Reload - ld.d $ra, $sp, 72 # 8-byte Folded Reload - addi.d $sp, $sp, 80 + ld.d $s2, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload + ld.d $s0, $sp, 72 # 8-byte Folded Reload + ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $ra, $sp, 88 # 8-byte Folded Reload + addi.d $sp, $sp, 96 ret .LBB5_574: # %.preheader.i2177.preheader pcalau12i $a0, %pc_hi20(global_data+640192) addi.d $a0, $a0, %pc_lo12(global_data+640192) b .LBB5_345 -.LBB5_575: # %vector.body4325.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_576: # %vector.body4325 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_576 +.LBB5_575: # %vector.body4525.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_576: # %vector.body4525 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_576 b .LBB5_573 -.LBB5_577: # %vector.body4320.preheader +.LBB5_577: # %vector.body4520.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 @@ -7421,7 +7323,7 @@ init: # @init lu12i.w $a2, 260096 ori $a2, $a2, 8 xvreplgr2vr.w $xr0, $a2 -.LBB5_578: # %vector.body4320 +.LBB5_578: # %vector.body4520 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7429,162 +7331,155 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_578 b .LBB5_573 -.LBB5_579: # %vector.body4310.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_580: # %vector.body4310 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_579: # %vector.body4504.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_580: # %vector.body4504 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_580 -# %bb.581: # %vector.body4315.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_582: # %vector.body4315 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.581: # %vector.body4512.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_582: # %vector.body4512 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_582 b .LBB5_573 -.LBB5_583: # %vector.body4305.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_584: # %vector.body4305 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_584 +.LBB5_583: # %vector.body4496.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_584: # %vector.body4496 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_584 b .LBB5_573 -.LBB5_585: # %vector.body4300.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_586: # %vector.body4300 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_586 +.LBB5_585: # %vector.body4488.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_586: # %vector.body4488 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_586 b .LBB5_573 -.LBB5_587: # %vector.body4295.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_588: # %vector.body4295 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_588 +.LBB5_587: # %vector.body4480.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_588: # %vector.body4480 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_588 b .LBB5_573 -.LBB5_589: # %vector.body4290.preheader +.LBB5_589: # %vector.body4472.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_590: # %vector.body4290 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_590 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_590: # %vector.body4472 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_590 b .LBB5_607 .LBB5_591: # %.preheader.i2230.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7603,79 +7498,74 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a1, $fp, $a0 - ori $a2, $zero, 2 + ori $a0, $a0, 2104 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + add.d $a2, $fp, $a0 lu12i.w $a0, 7 ori $a3, $a0, 3328 -.LBB5_592: # %vector.body4275 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_592: # %vector.body4448 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_592 -# %bb.593: # %vector.body4280.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.593: # %vector.body4456.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_594: # %vector.body4280 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_594: # %vector.body4456 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_594 -# %bb.595: # %vector.body4285.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.595: # %vector.body4464.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_596: # %vector.body4285 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_596: # %vector.body4464 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_596 b .LBB5_573 .LBB5_597: # %.preheader34.i2253.preheader @@ -7734,42 +7624,41 @@ init: # @init lu12i.w $a1, 220 ori $a1, $a1, 1212 b .LBB5_630 -.LBB5_600: # %vector.body4263.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_601: # %vector.body4263 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_601 - b .LBB5_573 -.LBB5_602: # %vector.body4256.preheader +.LBB5_600: # %vector.body4433.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_601: # %vector.body4433 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_601 + b .LBB5_573 +.LBB5_602: # %vector.body4426.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_603: # %vector.body4256 +.LBB5_603: # %vector.body4426 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -7804,45 +7693,44 @@ init: # @init pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) b .LBB5_610 -.LBB5_605: # %vector.body4251.preheader +.LBB5_605: # %vector.body4418.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_606: # %vector.body4251 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_606 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_606: # %vector.body4418 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_606 .LBB5_607: # %set1d.exit2229 lu12i.w $a1, 31 ori $a1, $a1, 1020 lu12i.w $a2, -262144 b .LBB5_687 -.LBB5_608: # %vector.body4246.preheader +.LBB5_608: # %vector.body4413.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_609: # %vector.body4246 +.LBB5_609: # %vector.body4413 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7855,14 +7743,14 @@ init: # @init add.d $a0, $a0, $a2 ori $a2, $a1, 1024 b .LBB5_346 -.LBB5_611: # %vector.body4241.preheader +.LBB5_611: # %vector.body4408.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_612: # %vector.body4241 +.LBB5_612: # %vector.body4408 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7886,147 +7774,141 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 b .LBB5_573 -.LBB5_614: # %vector.body4216.preheader +.LBB5_614: # %vector.body4374.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_615: # %vector.body4216 +.LBB5_615: # %vector.body4374 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_615 -# %bb.616: # %vector.body4221.preheader +# %bb.616: # %vector.body4379.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_617: # %vector.body4221 +.LBB5_617: # %vector.body4379 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_617 -# %bb.618: # %vector.body4226.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.618: # %vector.body4384.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_619: # %vector.body4226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_619: # %vector.body4384 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_619 -# %bb.620: # %vector.body4231.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.620: # %vector.body4392.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_621: # %vector.body4231 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_621: # %vector.body4392 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_621 -# %bb.622: # %vector.body4236.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.622: # %vector.body4400.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_623: # %vector.body4236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_623: # %vector.body4400 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_623 b .LBB5_573 -.LBB5_624: # %vector.body4211.preheader +.LBB5_624: # %vector.body4366.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_625: # %vector.body4211 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_625 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_625: # %vector.body4366 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_625 # %bb.626: # %set1d.exit2327 lu12i.w $a1, 31 b .LBB5_686 -.LBB5_627: # %vector.body4204.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_627: # %vector.body4359.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_628: # %vector.body4204 +.LBB5_628: # %vector.body4359 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8075,81 +7957,77 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ori $a0, $s0, 1044 + ori $a0, $s0, 1048 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_632: # %vector.body4199 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_632 - b .LBB5_573 -.LBB5_633: # %vector.body4189.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_634: # %vector.body4189 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_634 -# %bb.635: # %vector.body4194.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_636: # %vector.body4194 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_632: # %vector.body4351 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_632 + b .LBB5_573 +.LBB5_633: # %vector.body4335.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_634: # %vector.body4335 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_634 +# %bb.635: # %vector.body4343.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_636: # %vector.body4343 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_636 b .LBB5_573 .LBB5_637: # %.preheader34.i2352.preheader @@ -8248,26 +8126,26 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_640 b .LBB5_573 -.LBB5_641: # %vector.body4167.preheader +.LBB5_641: # %vector.body4313.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_642: # %vector.body4167 +.LBB5_642: # %vector.body4313 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_642 -# %bb.643: # %vector.body4172.preheader +# %bb.643: # %vector.body4318.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_644: # %vector.body4172 +.LBB5_644: # %vector.body4318 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8275,78 +8153,75 @@ init: # @init addi.d $a2, $a2, 64 bnez $a1, .LBB5_644 b .LBB5_653 -.LBB5_645: # %vector.body4157.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_646: # %vector.body4157 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_645: # %vector.body4297.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_646: # %vector.body4297 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_646 -# %bb.647: # %vector.body4162.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_648: # %vector.body4162 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.647: # %vector.body4305.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_648: # %vector.body4305 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_648 b .LBB5_573 -.LBB5_649: # %vector.body4147.preheader +.LBB5_649: # %vector.body4287.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_650: # %vector.body4147 +.LBB5_650: # %vector.body4287 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_650 -# %bb.651: # %vector.body4152.preheader +# %bb.651: # %vector.body4292.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_652: # %vector.body4152 +.LBB5_652: # %vector.body4292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8359,134 +8234,128 @@ init: # @init lu12i.w $a2, 260096 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_654: # %vector.body4132.preheader +.LBB5_654: # %vector.body4266.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_655: # %vector.body4132 +.LBB5_655: # %vector.body4266 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_655 -# %bb.656: # %vector.body4137.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_657: # %vector.body4137 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_657 -# %bb.658: # %vector.body4142.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_659: # %vector.body4142 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.656: # %vector.body4271.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a1, 3328 +.LBB5_657: # %vector.body4271 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_657 +# %bb.658: # %vector.body4279.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_659: # %vector.body4279 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_659 b .LBB5_573 -.LBB5_660: # %vector.body4117.preheader +.LBB5_660: # %vector.body4245.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_661: # %vector.body4117 +.LBB5_661: # %vector.body4245 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_661 -# %bb.662: # %vector.body4122.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.662: # %vector.body4250.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_663: # %vector.body4122 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_663: # %vector.body4250 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_663 -# %bb.664: # %vector.body4127.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.664: # %vector.body4258.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_665: # %vector.body4127 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_665: # %vector.body4258 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_665 b .LBB5_573 .LBB5_666: # %.preheader.i2421.preheader @@ -8503,74 +8372,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_667: # %vector.body4097 +.LBB5_667: # %vector.body4219 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_667 -# %bb.668: # %vector.body4102.preheader +# %bb.668: # %vector.body4224.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_669: # %vector.body4102 +.LBB5_669: # %vector.body4224 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_669 -# %bb.670: # %vector.body4107.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.670: # %vector.body4229.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_671: # %vector.body4107 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_671: # %vector.body4229 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_671 -# %bb.672: # %vector.body4112.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.672: # %vector.body4237.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_673: # %vector.body4112 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_673: # %vector.body4237 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_673 b .LBB5_573 .LBB5_674: # %.preheader.i2443.preheader @@ -8719,69 +8585,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_679 b .LBB5_573 -.LBB5_680: # %vector.body4063.preheader +.LBB5_680: # %vector.body4179.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 xvldi $xr0, -1424 -.LBB5_681: # %vector.body4063 +.LBB5_681: # %vector.body4179 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a3, $a3, -16 addi.d $a1, $a1, 64 bnez $a3, .LBB5_681 -# %bb.682: # %vector.body4068.preheader +# %bb.682: # %vector.body4184.preheader lu12i.w $a1, 31 - ori $a3, $a1, 1044 - add.d $a3, $a0, $a3 - ori $a4, $zero, 2 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a4, $a1, 1048 + add.d $a4, $a0, $a4 ori $a5, $a2, 3328 -.LBB5_683: # %vector.body4068 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - addi.d $a7, $a4, -1 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa0, $a7 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a5, $a5, -2 - addi.w $a4, $a4, 2 +.LBB5_683: # %vector.body4184 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a4, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a5, $a5, -4 + addi.d $a4, $a4, 16 bnez $a5, .LBB5_683 -# %bb.684: # %vector.body4073.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2100 +# %bb.684: # %vector.body4192.preheader + lu12i.w $a4, 62 + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a3, $a4, 2104 add.d $a3, $a0, $a3 - ori $a4, $zero, 2 ori $a2, $a2, 3328 -.LBB5_685: # %vector.body4073 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a4, 31, 0 - addi.d $a6, $a4, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a2, $a2, -2 - addi.w $a4, $a4, 2 +.LBB5_685: # %vector.body4192 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a3, $a3, 16 bnez $a2, .LBB5_685 .LBB5_686: # %set1d.exit2479 ori $a1, $a1, 1020 @@ -8790,16 +8653,16 @@ init: # @init lu32i.d $a2, 0 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_688: # %vector.body4051.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_688: # %vector.body4167.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_689: # %vector.body4051 +.LBB5_689: # %vector.body4167 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8830,12 +8693,12 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_689 -# %bb.690: # %vector.body4058.preheader +# %bb.690: # %vector.body4174.preheader pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 xvldi $xr0, -1424 -.LBB5_691: # %vector.body4058 +.LBB5_691: # %vector.body4174 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -8843,28 +8706,28 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_691 b .LBB5_573 -.LBB5_692: # %vector.body4039.preheader +.LBB5_692: # %vector.body4155.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_693: # %vector.body4039 +.LBB5_693: # %vector.body4155 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_693 -# %bb.694: # %vector.body4044.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.694: # %vector.body4160.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_695: # %vector.body4044 +.LBB5_695: # %vector.body4160 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8904,15 +8767,15 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_697: # %vector.body4032 +.LBB5_697: # %vector.body4148 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8944,28 +8807,28 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_697 b .LBB5_573 -.LBB5_698: # %vector.body4020.preheader +.LBB5_698: # %vector.body4136.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_699: # %vector.body4020 +.LBB5_699: # %vector.body4136 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_699 -# %bb.700: # %vector.body4025.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.700: # %vector.body4141.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_701: # %vector.body4025 +.LBB5_701: # %vector.body4141 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8997,30 +8860,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_701 b .LBB5_573 -.LBB5_702: # %vector.body4008.preheader +.LBB5_702: # %vector.body4124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_703: # %vector.body4008 +.LBB5_703: # %vector.body4124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_703 -# %bb.704: # %vector.body4013.preheader +# %bb.704: # %vector.body4129.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_705: # %vector.body4013 +.LBB5_705: # %vector.body4129 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9052,30 +8915,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_705 b .LBB5_573 -.LBB5_706: # %vector.body3996.preheader +.LBB5_706: # %vector.body4112.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_707: # %vector.body3996 +.LBB5_707: # %vector.body4112 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_707 -# %bb.708: # %vector.body4001.preheader +# %bb.708: # %vector.body4117.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_709: # %vector.body4001 +.LBB5_709: # %vector.body4117 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9107,7 +8970,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_709 b .LBB5_573 -.LBB5_710: # %vector.body3971.preheader +.LBB5_710: # %vector.body4081.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 @@ -9115,7 +8978,7 @@ init: # @init ori $a1, $s0, 3328 xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill -.LBB5_711: # %vector.body3971 +.LBB5_711: # %vector.body4081 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9123,63 +8986,60 @@ init: # @init addi.d $a1, $a1, -16 addi.d $a0, $a0, 64 bnez $a1, .LBB5_711 -# %bb.712: # %vector.body3976.preheader +# %bb.712: # %vector.body4086.preheader lu12i.w $a1, 31 - ori $a0, $a1, 1044 - add.d $a0, $fp, $a0 - ori $a2, $zero, 2 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a2, $a1, 1048 + add.d $a2, $fp, $a2 ori $a3, $s0, 3328 -.LBB5_713: # %vector.body3976 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_713: # %vector.body4086 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_713 -# %bb.714: # %vector.body3981.preheader - lu12i.w $a0, 62 - ori $a0, $a0, 2100 +# %bb.714: # %vector.body4094.preheader + lu12i.w $a2, 62 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a2, 2104 add.d $a0, $fp, $a0 - ori $a2, $zero, 2 - ori $a3, $s0, 3328 -.LBB5_715: # %vector.body3981 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_715 -# %bb.716: # %vector.body3986.preheader + ori $a2, $s0, 3328 +.LBB5_715: # %vector.body4094 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a0, $a0, 16 + bnez $a2, .LBB5_715 +# %bb.716: # %vector.body4102.preheader lu12i.w $a0, 93 ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 xvldi $xr0, -1296 -.LBB5_717: # %vector.body3986 +.LBB5_717: # %vector.body4102 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -9198,7 +9058,7 @@ init: # @init ori $a0, $a0, 2500 add.d $a0, $fp, $a0 ori $a1, $s0, 3328 -.LBB5_719: # %vector.body3991 +.LBB5_719: # %vector.body4107 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9207,238 +9067,225 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_719 b .LBB5_573 -.LBB5_720: # %vector.body3946.preheader +.LBB5_720: # %vector.body4044.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_721: # %vector.body3946 +.LBB5_721: # %vector.body4044 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_721 -# %bb.722: # %vector.body3951.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.722: # %vector.body4049.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_723: # %vector.body3951 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_723: # %vector.body4049 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_723 -# %bb.724: # %vector.body3956.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.724: # %vector.body4057.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_725: # %vector.body3956 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_725: # %vector.body4057 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_725 -# %bb.726: # %vector.body3961.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.726: # %vector.body4065.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_727: # %vector.body3961 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_727: # %vector.body4065 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_727 -# %bb.728: # %vector.body3966.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.728: # %vector.body4073.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_729: # %vector.body3966 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_729: # %vector.body4073 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_729 b .LBB5_573 -.LBB5_730: # %vector.body3931.preheader +.LBB5_730: # %vector.body4023.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_731: # %vector.body3931 +.LBB5_731: # %vector.body4023 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_731 -# %bb.732: # %vector.body3936.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.732: # %vector.body4028.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_733: # %vector.body3936 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_733: # %vector.body4028 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_733 -# %bb.734: # %vector.body3941.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.734: # %vector.body4036.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_735: # %vector.body3941 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_735: # %vector.body4036 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_735 b .LBB5_573 -.LBB5_736: # %vector.body3921.preheader +.LBB5_736: # %vector.body4007.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_737: # %vector.body3921 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_737: # %vector.body4007 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_737 -# %bb.738: # %vector.body3926.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.738: # %vector.body4015.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_739: # %vector.body3926 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_739: # %vector.body4015 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_739 b .LBB5_573 .LBB5_740: # %.preheader.i2608.preheader @@ -9455,14 +9302,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_741: # %vector.body3911 +.LBB5_741: # %vector.body3997 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_741 -# %bb.742: # %vector.body3916.preheader +# %bb.742: # %vector.body4002.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -9470,7 +9317,7 @@ init: # @init lu12i.w $a2, 219235 ori $a2, $a2, 1981 xvreplgr2vr.w $xr0, $a2 -.LBB5_743: # %vector.body3916 +.LBB5_743: # %vector.body4002 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -9488,14 +9335,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_745: # %vector.body3904 +.LBB5_745: # %vector.body3990 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9527,248 +9374,237 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_745 b .LBB5_573 -.LBB5_746: # %vector.body3879.preheader +.LBB5_746: # %vector.body3959.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_747: # %vector.body3879 +.LBB5_747: # %vector.body3959 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_747 -# %bb.748: # %vector.body3884.preheader +# %bb.748: # %vector.body3964.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_749: # %vector.body3884 +.LBB5_749: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_749 -# %bb.750: # %vector.body3889.preheader +# %bb.750: # %vector.body3969.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_751: # %vector.body3889 +.LBB5_751: # %vector.body3969 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_751 -# %bb.752: # %vector.body3894.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.752: # %vector.body3974.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_753: # %vector.body3894 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_753: # %vector.body3974 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_753 -# %bb.754: # %vector.body3899.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.754: # %vector.body3982.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_755: # %vector.body3899 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_755: # %vector.body3982 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_755 b .LBB5_573 -.LBB5_756: # %vector.body3859.preheader +.LBB5_756: # %vector.body3930.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_757: # %vector.body3859 +.LBB5_757: # %vector.body3930 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_757 -# %bb.758: # %vector.body3864.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.758: # %vector.body3935.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_759: # %vector.body3864 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_759: # %vector.body3935 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_759 -# %bb.760: # %vector.body3869.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.760: # %vector.body3943.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_761: # %vector.body3869 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_761: # %vector.body3943 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_761 -# %bb.762: # %vector.body3874.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.762: # %vector.body3951.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_763: # %vector.body3874 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_763: # %vector.body3951 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_763 b .LBB5_573 -.LBB5_764: # %vector.body3844.preheader +.LBB5_764: # %vector.body3909.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_765: # %vector.body3844 +.LBB5_765: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_765 -# %bb.766: # %vector.body3849.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.766: # %vector.body3914.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_767: # %vector.body3849 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_767: # %vector.body3914 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_767 -# %bb.768: # %vector.body3854.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.768: # %vector.body3922.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_769: # %vector.body3854 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_769: # %vector.body3922 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_769 b .LBB5_573 .LBB5_770: # %.preheader.i2684.preheader @@ -9785,103 +9621,99 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_771: # %vector.body3829 +.LBB5_771: # %vector.body3888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_771 -# %bb.772: # %vector.body3834.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.772: # %vector.body3893.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_773: # %vector.body3834 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_773: # %vector.body3893 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_773 -# %bb.774: # %vector.body3839.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.774: # %vector.body3901.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_775: # %vector.body3839 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_775: # %vector.body3901 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_775 b .LBB5_573 -.LBB5_776: # %vector.body3819.preheader +.LBB5_776: # %vector.body3875.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 32 - lu12i.w $a2, 7 - ori $a3, $a2, 3328 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_777: # %vector.body3819 +.LBB5_777: # %vector.body3875 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 - addi.d $a1, $a1, 64 + addi.d $a2, $a2, 64 bnez $a3, .LBB5_777 -# %bb.778: # %vector.body3824.preheader - lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a0, $a0, $a1 - ori $a1, $zero, 2 - ori $a2, $a2, 3328 -.LBB5_779: # %vector.body3824 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_779 +# %bb.778: # %vector.body3880.preheader + lu12i.w $a2, 31 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 1048 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_779: # %vector.body3880 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_779 b .LBB5_573 .LBB5_780: # %.preheader.i2711.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9897,23 +9729,23 @@ init: # @init lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_781: # %vector.body3807 +.LBB5_781: # %vector.body3863 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_781 -# %bb.782: # %vector.body3812.preheader +# %bb.782: # %vector.body3868.preheader lu12i.w $a0, 62 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) pcalau12i $a2, %pc_hi20(.LCPI5_1) - xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_2) + xvld $xr1, $a2, %pc_lo12(.LCPI5_2) ori $a0, $a0, 2096 add.d $a0, $fp, $a0 ori $a1, $a1, 3328 -.LBB5_783: # %vector.body3812 +.LBB5_783: # %vector.body3868 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9959,142 +9791,135 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_785: # %vector.body3792 +.LBB5_785: # %vector.body3842 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_785 -# %bb.786: # %vector.body3797.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.786: # %vector.body3847.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_787: # %vector.body3797 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_787: # %vector.body3847 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_787 -# %bb.788: # %vector.body3802.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.788: # %vector.body3855.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_789: # %vector.body3802 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_789: # %vector.body3855 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_789 b .LBB5_573 -.LBB5_790: # %vector.body3782.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_791: # %vector.body3782 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_790: # %vector.body3826.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_791: # %vector.body3826 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_791 -# %bb.792: # %vector.body3787.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_793: # %vector.body3787 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.792: # %vector.body3834.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_793: # %vector.body3834 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_793 b .LBB5_573 -.LBB5_794: # %vector.body3770.preheader +.LBB5_794: # %vector.body3811.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_795: # %vector.body3770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_795 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_795: # %vector.body3811 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_795 # %bb.796: # %.preheader34.i2757.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -10160,127 +9985,121 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_799: # %vector.body3755 +.LBB5_799: # %vector.body3790 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_799 -# %bb.800: # %vector.body3760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.800: # %vector.body3795.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_801: # %vector.body3760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_801: # %vector.body3795 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_801 -# %bb.802: # %vector.body3765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.802: # %vector.body3803.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_803: # %vector.body3765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_803: # %vector.body3803 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_803 b .LBB5_573 -.LBB5_804: # %vector.body3740.preheader +.LBB5_804: # %vector.body3769.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_805: # %vector.body3740 +.LBB5_805: # %vector.body3769 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_805 -# %bb.806: # %vector.body3745.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.806: # %vector.body3774.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_807: # %vector.body3745 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_807: # %vector.body3774 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_807 -# %bb.808: # %vector.body3750.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.808: # %vector.body3782.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_809: # %vector.body3750 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_809: # %vector.body3782 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_809 b .LBB5_573 .LBB5_810: # %.preheader.i2797.preheader @@ -10293,14 +10112,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_811: # %vector.body3733 +.LBB5_811: # %vector.body3762 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10342,14 +10161,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_813: # %vector.body3726 +.LBB5_813: # %vector.body3755 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10391,14 +10210,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_815: # %vector.body3719 +.LBB5_815: # %vector.body3748 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10440,14 +10259,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_817: # %vector.body3712 +.LBB5_817: # %vector.body3741 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10489,14 +10308,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_819: # %vector.body3705 +.LBB5_819: # %vector.body3734 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10528,26 +10347,26 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_819 b .LBB5_573 -.LBB5_820: # %vector.body3695.preheader +.LBB5_820: # %vector.body3724.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_821: # %vector.body3695 +.LBB5_821: # %vector.body3724 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_821 -# %bb.822: # %vector.body3700.preheader +# %bb.822: # %vector.body3729.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_823: # %vector.body3700 +.LBB5_823: # %vector.body3729 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10555,95 +10374,92 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_823 b .LBB5_573 -.LBB5_824: # %vector.body3680.preheader +.LBB5_824: # %vector.body3703.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_825: # %vector.body3680 +.LBB5_825: # %vector.body3703 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_825 -# %bb.826: # %vector.body3685.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.826: # %vector.body3708.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_827: # %vector.body3685 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_827: # %vector.body3708 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_827 -# %bb.828: # %vector.body3690.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.828: # %vector.body3716.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_829: # %vector.body3690 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_829: # %vector.body3716 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_829 b .LBB5_573 -.LBB5_830: # %vector.body3668.preheader +.LBB5_830: # %vector.body3691.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_831: # %vector.body3668 +.LBB5_831: # %vector.body3691 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_831 -# %bb.832: # %vector.body3673.preheader +# %bb.832: # %vector.body3696.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_833: # %vector.body3673 +.LBB5_833: # %vector.body3696 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10675,16 +10491,16 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_833 b .LBB5_573 -.LBB5_834: # %vector.body3653.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_834: # %vector.body3674.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_835: # %vector.body3653 +.LBB5_835: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10715,7 +10531,7 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_835 -# %bb.836: # %vector.body3658.preheader +# %bb.836: # %vector.body3681.preheader pcalau12i $a1, %pc_hi20(global_data) addi.d $a1, $a1, %pc_lo12(global_data) lu12i.w $a2, 31 @@ -10723,20 +10539,20 @@ init: # @init add.d $a2, $a1, $a2 ori $a3, $a0, 3328 xvldi $xr0, -1424 -.LBB5_837: # %vector.body3658 +.LBB5_837: # %vector.body3681 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_837 -# %bb.838: # %vector.body3663.preheader +# %bb.838: # %vector.body3686.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 xvldi $xr0, -1296 -.LBB5_839: # %vector.body3663 +.LBB5_839: # %vector.body3686 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -10744,40 +10560,40 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_839 b .LBB5_573 -.LBB5_840: # %vector.body3638.preheader +.LBB5_840: # %vector.body3659.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_841: # %vector.body3638 +.LBB5_841: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_841 -# %bb.842: # %vector.body3643.preheader +# %bb.842: # %vector.body3664.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -3264 -.LBB5_843: # %vector.body3643 +.LBB5_843: # %vector.body3664 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_843 -# %bb.844: # %vector.body3648.preheader +# %bb.844: # %vector.body3669.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3265 -.LBB5_845: # %vector.body3648 +.LBB5_845: # %vector.body3669 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10785,82 +10601,78 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_845 b .LBB5_573 -.LBB5_846: # %vector.body3633.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_847: # %vector.body3633 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_847 - b .LBB5_573 -.LBB5_848: # %vector.body3623.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 +.LBB5_846: # %vector.body3651.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_849: # %vector.body3623 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_849 -# %bb.850: # %vector.body3628.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_851: # %vector.body3628 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_847: # %vector.body3651 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_847 + b .LBB5_573 +.LBB5_848: # %vector.body3635.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_849: # %vector.body3635 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_849 +# %bb.850: # %vector.body3643.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_851: # %vector.body3643 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_851 b .LBB5_573 .Lfunc_end5: diff --git a/results/MultiSource/Benchmarks/TSVC/Equivalencing-flt/CMakeFiles/Equivalencing-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Equivalencing-flt/CMakeFiles/Equivalencing-flt.dir/tsc.s index 5be9514..82ec708 100644 --- a/results/MultiSource/Benchmarks/TSVC/Equivalencing-flt/CMakeFiles/Equivalencing-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Equivalencing-flt/CMakeFiles/Equivalencing-flt.dir/tsc.s @@ -11,6 +11,11 @@ .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI0_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1d .p2align 5 @@ -63,30 +68,29 @@ set1d: # @set1d bnez $a1, .LBB0_3 b .LBB0_8 .LBB0_4: # %vector.body40.preheader - addi.d $a0, $a0, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a1, %pc_hi20(.LCPI0_2) + vld $vr0, $a1, %pc_lo12(.LCPI0_2) + addi.d $a0, $a0, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB0_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB0_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB0_5 b .LBB0_8 .LBB0_6: # %.preheader.preheader move $a2, $zero @@ -118,6 +122,11 @@ set1d: # @set1d .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI1_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1ds .p2align 5 @@ -170,30 +179,29 @@ set1ds: # @set1ds bnez $a0, .LBB1_3 b .LBB1_8 .LBB1_4: # %vector.body40.preheader - addi.d $a0, $a1, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) + addi.d $a0, $a1, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB1_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB1_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB1_5 b .LBB1_8 .LBB1_6: # %.preheader.preheader move $a0, $zero @@ -685,14 +693,19 @@ check: # @check .word .LBB4_14-.LJTI4_0 .word .LBB4_17-.LJTI4_0 # -- End function - .section .rodata.cst32,"aM",@progbits,32 - .p2align 5, 0x0 # -- Begin function init + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 # -- Begin function init .LCPI5_0: + .dword 0 # 0x0 + .dword 1 # 0x1 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 +.LCPI5_1: .dword 4 # 0x4 .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 -.LCPI5_1: +.LCPI5_2: .dword 0 # 0x0 .dword 1 # 0x1 .dword 2 # 0x2 @@ -703,11 +716,12 @@ check: # @check .type init,@function init: # @init # %bb.0: - addi.d $sp, $sp, -80 - st.d $ra, $sp, 72 # 8-byte Folded Spill - st.d $fp, $sp, 64 # 8-byte Folded Spill - st.d $s0, $sp, 56 # 8-byte Folded Spill - st.d $s1, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -96 + st.d $ra, $sp, 88 # 8-byte Folded Spill + st.d $fp, $sp, 80 # 8-byte Folded Spill + st.d $s0, $sp, 72 # 8-byte Folded Spill + st.d $s1, $sp, 64 # 8-byte Folded Spill + st.d $s2, $sp, 56 # 8-byte Folded Spill move $fp, $a0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a1, $a0, %pc_lo12(.L.str.1) @@ -1661,131 +1675,122 @@ init: # @init jirl $ra, $ra, 0 bnez $a0, .LBB5_573 # %bb.136: # %vector.body.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 + addi.d $a3, $a0, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 .LBB5_137: # %vector.body # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_137 # %bb.138: # %vector.body3598.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 + lu12i.w $a3, 31 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 .LBB5_139: # %vector.body3598 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_139 -# %bb.140: # %vector.body3603.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_141: # %vector.body3603 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.140: # %vector.body3606.preheader + lu12i.w $a3, 62 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_141: # %vector.body3606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_141 -# %bb.142: # %vector.body3608.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_143: # %vector.body3608 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.142: # %vector.body3614.preheader + lu12i.w $a3, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_143: # %vector.body3614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_143 -# %bb.144: # %vector.body3613.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_145: # %vector.body3613 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 - bnez $a1, .LBB5_145 +# %bb.144: # %vector.body3622.preheader + lu12i.w $a3, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a3, 136 + add.d $a1, $a0, $a1 + ori $a2, $a2, 3328 +.LBB5_145: # %vector.body3622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_145 # %bb.146: # %.preheader34.i2938.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -1837,7 +1842,7 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_147 b .LBB5_573 -.LBB5_148: # %vector.body5445.preheader +.LBB5_148: # %vector.body5846.preheader pcalau12i $a0, %pc_hi20(X+4) addi.d $a0, $a0, %pc_lo12(X+4) pcalau12i $a1, %pc_hi20(V+4) @@ -1852,7 +1857,7 @@ init: # @init lu12i.w $a6, 7 ori $a6, $a6, 3328 .p2align 4, , 16 -.LBB5_149: # %vector.body5445 +.LBB5_149: # %vector.body5846 # =>This Inner Loop Header: Depth=1 bstrpick.d $a7, $a5, 31, 0 addi.d $t0, $a5, -5 @@ -1896,7 +1901,7 @@ init: # @init addi.d $a4, $a4, 8 bnez $a6, .LBB5_149 b .LBB5_573 -.LBB5_150: # %vector.body5412.preheader +.LBB5_150: # %vector.body5813.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -1904,24 +1909,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_151: # %vector.body5412 +.LBB5_151: # %vector.body5813 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_151 -# %bb.152: # %vector.body5417.preheader +# %bb.152: # %vector.body5818.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_153: # %vector.body5417 +.LBB5_153: # %vector.body5818 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1952,15 +1957,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_153 -# %bb.154: # %vector.body5424.preheader +# %bb.154: # %vector.body5825.preheader lu12i.w $a4, 62 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 2096 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_155: # %vector.body5424 +.LBB5_155: # %vector.body5825 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1991,15 +1996,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_155 -# %bb.156: # %vector.body5431.preheader +# %bb.156: # %vector.body5832.preheader lu12i.w $a4, 93 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 3152 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_157: # %vector.body5431 +.LBB5_157: # %vector.body5832 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2030,15 +2035,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_157 -# %bb.158: # %vector.body5438.preheader +# %bb.158: # %vector.body5839.preheader lu12i.w $a4, 125 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_159: # %vector.body5438 +.LBB5_159: # %vector.body5839 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2070,7 +2075,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_159 b .LBB5_573 -.LBB5_160: # %vector.body5400.preheader +.LBB5_160: # %vector.body5801.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2078,24 +2083,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_161: # %vector.body5400 +.LBB5_161: # %vector.body5801 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_161 -# %bb.162: # %vector.body5405.preheader +# %bb.162: # %vector.body5806.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_163: # %vector.body5405 +.LBB5_163: # %vector.body5806 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2127,7 +2132,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_163 b .LBB5_573 -.LBB5_164: # %vector.body5388.preheader +.LBB5_164: # %vector.body5789.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2135,24 +2140,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_165: # %vector.body5388 +.LBB5_165: # %vector.body5789 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_165 -# %bb.166: # %vector.body5393.preheader +# %bb.166: # %vector.body5794.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_167: # %vector.body5393 +.LBB5_167: # %vector.body5794 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2288,7 +2293,7 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_171 b .LBB5_573 -.LBB5_172: # %vector.body5354.preheader +.LBB5_172: # %vector.body5755.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2296,7 +2301,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_173: # %vector.body5354 +.LBB5_173: # %vector.body5755 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2436,14 +2441,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_179 b .LBB5_573 -.LBB5_180: # %vector.body5349.preheader +.LBB5_180: # %vector.body5750.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_181: # %vector.body5349 +.LBB5_181: # %vector.body5750 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -2451,7 +2456,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_181 b .LBB5_573 -.LBB5_182: # %vector.body5339.preheader +.LBB5_182: # %vector.body5740.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2459,7 +2464,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_183: # %vector.body5339 +.LBB5_183: # %vector.body5740 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2610,30 +2615,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_189 b .LBB5_573 -.LBB5_190: # %vector.body5315.preheader +.LBB5_190: # %vector.body5716.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_191: # %vector.body5315 +.LBB5_191: # %vector.body5716 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_191 -# %bb.192: # %vector.body5320.preheader +# %bb.192: # %vector.body5721.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_193: # %vector.body5320 +.LBB5_193: # %vector.body5721 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2665,30 +2670,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_193 b .LBB5_573 -.LBB5_194: # %vector.body5303.preheader +.LBB5_194: # %vector.body5704.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_195: # %vector.body5303 +.LBB5_195: # %vector.body5704 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_195 -# %bb.196: # %vector.body5308.preheader +# %bb.196: # %vector.body5709.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_197: # %vector.body5308 +.LBB5_197: # %vector.body5709 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2734,74 +2739,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_199: # %vector.body5283 +.LBB5_199: # %vector.body5678 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_199 -# %bb.200: # %vector.body5288.preheader +# %bb.200: # %vector.body5683.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_201: # %vector.body5288 +.LBB5_201: # %vector.body5683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_201 -# %bb.202: # %vector.body5293.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.202: # %vector.body5688.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_203: # %vector.body5293 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_203: # %vector.body5688 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_203 -# %bb.204: # %vector.body5298.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.204: # %vector.body5696.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_205: # %vector.body5298 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_205: # %vector.body5696 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_205 b .LBB5_573 .LBB5_206: # %.preheader.i1195.preheader @@ -2818,74 +2820,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_207: # %vector.body5263 +.LBB5_207: # %vector.body5652 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_207 -# %bb.208: # %vector.body5268.preheader +# %bb.208: # %vector.body5657.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_209: # %vector.body5268 +.LBB5_209: # %vector.body5657 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_209 -# %bb.210: # %vector.body5273.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.210: # %vector.body5662.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_211: # %vector.body5273 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_211: # %vector.body5662 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_211 -# %bb.212: # %vector.body5278.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.212: # %vector.body5670.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_213: # %vector.body5278 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_213: # %vector.body5670 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_213 b .LBB5_573 .LBB5_214: # %.preheader.i1217.preheader @@ -3072,31 +3071,30 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_221 -# %bb.222: # %vector.body5236.preheader - pcalau12i $a1, %pc_hi20(array+4) - addi.d $a1, $a1, %pc_lo12(array+4) - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_223: # %vector.body5236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_223 +# %bb.222: # %vector.body5622.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(array+8) + addi.d $a1, $a1, %pc_lo12(array+8) + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_223: # %vector.body5622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_223 # %bb.224: # %.preheader34.i1253.preheader lu12i.w $a1, 284 ori $a1, $a1, 1888 @@ -3162,87 +3160,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_227: # %vector.body5211 +.LBB5_227: # %vector.body5588 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_227 -# %bb.228: # %vector.body5216.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.228: # %vector.body5593.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_229: # %vector.body5216 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_229: # %vector.body5593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_229 -# %bb.230: # %vector.body5221.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.230: # %vector.body5601.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_231: # %vector.body5221 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_231: # %vector.body5601 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_231 -# %bb.232: # %vector.body5226.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.232: # %vector.body5609.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_233: # %vector.body5226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_233: # %vector.body5609 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_233 b .LBB5_573 .LBB5_234: # %.preheader.i1284.preheader @@ -3259,32 +3252,32 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -3264 -.LBB5_235: # %vector.body5196 +.LBB5_235: # %vector.body5573 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_235 -# %bb.236: # %vector.body5201.preheader +# %bb.236: # %vector.body5578.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_237: # %vector.body5201 +.LBB5_237: # %vector.body5578 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_237 -# %bb.238: # %vector.body5206.preheader +# %bb.238: # %vector.body5583.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_239: # %vector.body5206 +.LBB5_239: # %vector.body5583 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -3292,30 +3285,30 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_239 b .LBB5_573 -.LBB5_240: # %vector.body5184.preheader +.LBB5_240: # %vector.body5561.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_241: # %vector.body5184 +.LBB5_241: # %vector.body5561 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_241 -# %bb.242: # %vector.body5189.preheader +# %bb.242: # %vector.body5566.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_243: # %vector.body5189 +.LBB5_243: # %vector.body5566 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3392,65 +3385,62 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_245 -# %bb.246: # %vector.body5174.preheader +# %bb.246: # %vector.body5545.preheader lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_247: # %vector.body5174 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_247: # %vector.body5545 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_247 -# %bb.248: # %vector.body5179.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.248: # %vector.body5553.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_249: # %vector.body5179 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_249: # %vector.body5553 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_249 b .LBB5_573 -.LBB5_250: # %vector.body5157.preheader +.LBB5_250: # %vector.body5528.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_251: # %vector.body5157 +.LBB5_251: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3506,30 +3496,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_253 b .LBB5_573 -.LBB5_254: # %vector.body5145.preheader +.LBB5_254: # %vector.body5516.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_255: # %vector.body5145 +.LBB5_255: # %vector.body5516 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_255 -# %bb.256: # %vector.body5150.preheader +# %bb.256: # %vector.body5521.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_257: # %vector.body5150 +.LBB5_257: # %vector.body5521 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3561,14 +3551,14 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_257 b .LBB5_573 -.LBB5_258: # %vector.body5125.preheader +.LBB5_258: # %vector.body5487.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 xvldi $xr0, -1424 -.LBB5_259: # %vector.body5125 +.LBB5_259: # %vector.body5487 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3583,274 +3573,263 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 + lu12i.w $a1, 62 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 2104 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_261: # %vector.body5130 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_261: # %vector.body5492 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_261 -# %bb.262: # %vector.body5135.preheader - lu12i.w $a0, 93 - ori $a0, $a0, 3156 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 +# %bb.262: # %vector.body5500.preheader + lu12i.w $a1, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 3160 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_263: # %vector.body5135 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_263: # %vector.body5500 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_263 -# %bb.264: # %vector.body5140.preheader - lu12i.w $a0, 125 - ori $a0, $a0, 132 +# %bb.264: # %vector.body5508.preheader + lu12i.w $a1, 125 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a1, 136 add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - ori $a2, $s0, 3328 -.LBB5_265: # %vector.body5140 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_265 + ori $a1, $s0, 3328 +.LBB5_265: # %vector.body5508 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_265 b .LBB5_573 -.LBB5_266: # %vector.body5094.preheader +.LBB5_266: # %vector.body5450.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_267: # %vector.body5094 +.LBB5_267: # %vector.body5450 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_267 -# %bb.268: # %vector.body5099.preheader +# %bb.268: # %vector.body5455.preheader lu12i.w $a2, 31 ori $a3, $a2, 1048 add.d $a4, $a0, $a3 lu12i.w $a3, 3 ori $a5, $a3, 3712 lu12i.w $a6, 260096 -.LBB5_269: # %vector.body5099 +.LBB5_269: # %vector.body5455 # =>This Inner Loop Header: Depth=1 st.w $a6, $a4, -8 st.w $a6, $a4, 0 addi.d $a5, $a5, -2 addi.d $a4, $a4, 16 bnez $a5, .LBB5_269 -# %bb.270: # %vector.body5104.preheader +# %bb.270: # %vector.body5460.preheader ori $a2, $a2, 1052 add.d $a2, $a0, $a2 ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 -.LBB5_271: # %vector.body5104 +.LBB5_271: # %vector.body5460 # =>This Inner Loop Header: Depth=1 st.w $a4, $a2, -8 st.w $a4, $a2, 0 addi.d $a3, $a3, -2 addi.d $a2, $a2, 16 bnez $a3, .LBB5_271 -# %bb.272: # %vector.body5110.preheader +# %bb.272: # %vector.body5466.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_273: # %vector.body5110 +.LBB5_273: # %vector.body5466 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_273 -# %bb.274: # %vector.body5115.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.274: # %vector.body5471.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_275: # %vector.body5115 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_275: # %vector.body5471 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_275 -# %bb.276: # %vector.body5120.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.276: # %vector.body5479.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_277: # %vector.body5120 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_277: # %vector.body5479 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_277 b .LBB5_573 -.LBB5_278: # %vector.body5079.preheader +.LBB5_278: # %vector.body5429.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_279: # %vector.body5079 +.LBB5_279: # %vector.body5429 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_279 -# %bb.280: # %vector.body5084.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.280: # %vector.body5434.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_281: # %vector.body5084 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_281: # %vector.body5434 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_281 -# %bb.282: # %vector.body5089.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.282: # %vector.body5442.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_283: # %vector.body5089 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_283: # %vector.body5442 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_283 b .LBB5_573 -.LBB5_284: # %vector.body5067.preheader +.LBB5_284: # %vector.body5417.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_285: # %vector.body5067 +.LBB5_285: # %vector.body5417 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_285 -# %bb.286: # %vector.body5072.preheader +# %bb.286: # %vector.body5422.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_287: # %vector.body5072 +.LBB5_287: # %vector.body5422 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3882,30 +3861,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_287 b .LBB5_573 -.LBB5_288: # %vector.body5055.preheader +.LBB5_288: # %vector.body5405.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_289: # %vector.body5055 +.LBB5_289: # %vector.body5405 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_289 -# %bb.290: # %vector.body5060.preheader +# %bb.290: # %vector.body5410.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_291: # %vector.body5060 +.LBB5_291: # %vector.body5410 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3937,30 +3916,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_291 b .LBB5_573 -.LBB5_292: # %vector.body5043.preheader +.LBB5_292: # %vector.body5393.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_293: # %vector.body5043 +.LBB5_293: # %vector.body5393 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_293 -# %bb.294: # %vector.body5048.preheader +# %bb.294: # %vector.body5398.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_295: # %vector.body5048 +.LBB5_295: # %vector.body5398 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3992,30 +3971,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_295 b .LBB5_573 -.LBB5_296: # %vector.body5031.preheader +.LBB5_296: # %vector.body5381.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_297: # %vector.body5031 +.LBB5_297: # %vector.body5381 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_297 -# %bb.298: # %vector.body5036.preheader +# %bb.298: # %vector.body5386.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_299: # %vector.body5036 +.LBB5_299: # %vector.body5386 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4047,30 +4026,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_299 b .LBB5_573 -.LBB5_300: # %vector.body5019.preheader +.LBB5_300: # %vector.body5369.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_301: # %vector.body5019 +.LBB5_301: # %vector.body5369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_301 -# %bb.302: # %vector.body5024.preheader +# %bb.302: # %vector.body5374.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_303: # %vector.body5024 +.LBB5_303: # %vector.body5374 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4102,69 +4081,66 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_303 b .LBB5_573 -.LBB5_304: # %vector.body5004.preheader +.LBB5_304: # %vector.body5348.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_305: # %vector.body5004 +.LBB5_305: # %vector.body5348 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_305 -# %bb.306: # %vector.body5009.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.306: # %vector.body5353.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_307: # %vector.body5009 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_307: # %vector.body5353 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_307 -# %bb.308: # %vector.body5014.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.308: # %vector.body5361.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_309: # %vector.body5014 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_309: # %vector.body5361 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_309 b .LBB5_573 .LBB5_310: # %.preheader.i1469.preheader @@ -4181,254 +4157,241 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_311: # %vector.body4984 +.LBB5_311: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_311 -# %bb.312: # %vector.body4989.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.312: # %vector.body5324.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_313: # %vector.body4989 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_313: # %vector.body5324 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_313 -# %bb.314: # %vector.body4994.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.314: # %vector.body5332.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_315: # %vector.body4994 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_315: # %vector.body5332 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_315 -# %bb.316: # %vector.body4999.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.316: # %vector.body5340.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_317: # %vector.body4999 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_317: # %vector.body5340 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_317 b .LBB5_573 -.LBB5_318: # %vector.body4964.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_319: # %vector.body4964 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_318: # %vector.body5293.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_319: # %vector.body5293 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_319 -# %bb.320: # %vector.body4969.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1072 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 +# %bb.320: # %vector.body5301.preheader + lu12i.w $a3, 31 + ori $a3, $a3, 1072 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 xvldi $xr0, -1424 -.LBB5_321: # %vector.body4969 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_321 -# %bb.322: # %vector.body4974.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2128 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 -.LBB5_323: # %vector.body4974 +.LBB5_321: # %vector.body5301 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_323 -# %bb.324: # %vector.body4979.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_325: # %vector.body4979 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_321 +# %bb.322: # %vector.body5306.preheader + lu12i.w $a3, 62 + ori $a3, $a3, 2128 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 +.LBB5_323: # %vector.body5306 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_323 +# %bb.324: # %vector.body5311.preheader + lu12i.w $a3, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 3160 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_325: # %vector.body5311 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_325 b .LBB5_573 -.LBB5_326: # %vector.body4944.preheader +.LBB5_326: # %vector.body5264.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_327: # %vector.body4944 +.LBB5_327: # %vector.body5264 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_327 -# %bb.328: # %vector.body4949.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.328: # %vector.body5269.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_329: # %vector.body4949 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_329: # %vector.body5269 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_329 -# %bb.330: # %vector.body4954.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.330: # %vector.body5277.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_331: # %vector.body4954 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_331: # %vector.body5277 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_331 -# %bb.332: # %vector.body4959.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.332: # %vector.body5285.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_333: # %vector.body4959 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_333: # %vector.body5285 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_333 b .LBB5_573 .LBB5_334: # %.preheader.i1529.preheader @@ -4445,19 +4408,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_335: # %vector.body4934 +.LBB5_335: # %vector.body5254 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_335 -# %bb.336: # %vector.body4939.preheader +# %bb.336: # %vector.body5259.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_337: # %vector.body4939 +.LBB5_337: # %vector.body5259 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -4915,69 +4878,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_358 b .LBB5_573 -.LBB5_359: # %vector.body4850.preheader +.LBB5_359: # %vector.body5164.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_360: # %vector.body4850 +.LBB5_360: # %vector.body5164 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_360 -# %bb.361: # %vector.body4855.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.361: # %vector.body5169.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_362: # %vector.body4855 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a5 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_362: # %vector.body5169 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a3, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_362 -# %bb.363: # %vector.body4860.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.363: # %vector.body5177.preheader + lu12i.w $a3, 62 + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 ori $a1, $a1, 3328 -.LBB5_364: # %vector.body4860 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a4 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 +.LBB5_364: # %vector.body5177 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a2, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a1, $a1, -4 + addi.d $a2, $a2, 16 bnez $a1, .LBB5_364 # %bb.365: # %.preheader.i1631.preheader lu12i.w $a1, 156 @@ -5071,50 +5031,50 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_368 b .LBB5_573 -.LBB5_369: # %vector.body4830.preheader +.LBB5_369: # %vector.body5144.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_370: # %vector.body4830 +.LBB5_370: # %vector.body5144 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_370 -# %bb.371: # %vector.body4835.preheader +# %bb.371: # %vector.body5149.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_372: # %vector.body4835 +.LBB5_372: # %vector.body5149 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_372 -# %bb.373: # %vector.body4840.preheader +# %bb.373: # %vector.body5154.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_374: # %vector.body4840 +.LBB5_374: # %vector.body5154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_374 -# %bb.375: # %vector.body4845.preheader +# %bb.375: # %vector.body5159.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_376: # %vector.body4845 +.LBB5_376: # %vector.body5159 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5122,7 +5082,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_376 b .LBB5_573 -.LBB5_377: # %vector.body4810.preheader +.LBB5_377: # %vector.body5124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -5131,43 +5091,43 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_378: # %vector.body4810 +.LBB5_378: # %vector.body5124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_378 -# %bb.379: # %vector.body4815.preheader +# %bb.379: # %vector.body5129.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_380: # %vector.body4815 +.LBB5_380: # %vector.body5129 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_380 -# %bb.381: # %vector.body4820.preheader +# %bb.381: # %vector.body5134.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_382: # %vector.body4820 +.LBB5_382: # %vector.body5134 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_382 -# %bb.383: # %vector.body4825.preheader +# %bb.383: # %vector.body5139.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_384: # %vector.body4825 +.LBB5_384: # %vector.body5139 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5189,87 +5149,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_386: # %vector.body4790 +.LBB5_386: # %vector.body5095 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_386 -# %bb.387: # %vector.body4795.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.387: # %vector.body5100.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_388: # %vector.body4795 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_388: # %vector.body5100 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_388 -# %bb.389: # %vector.body4800.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.389: # %vector.body5108.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_390: # %vector.body4800 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_390: # %vector.body5108 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_390 -# %bb.391: # %vector.body4805.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.391: # %vector.body5116.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_392: # %vector.body4805 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_392: # %vector.body5116 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_392 b .LBB5_573 .LBB5_393: # %.preheader.i1701.preheader @@ -5286,14 +5241,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_394: # %vector.body4775 +.LBB5_394: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_394 -# %bb.395: # %vector.body4780.preheader +# %bb.395: # %vector.body5085.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -5301,19 +5256,19 @@ init: # @init lu12i.w $a3, 219235 ori $a3, $a3, 1981 xvreplgr2vr.w $xr0, $a3 -.LBB5_396: # %vector.body4780 +.LBB5_396: # %vector.body5085 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_396 -# %bb.397: # %vector.body4785.preheader +# %bb.397: # %vector.body5090.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_398: # %vector.body4785 +.LBB5_398: # %vector.body5090 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5335,87 +5290,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_400: # %vector.body4755 +.LBB5_400: # %vector.body5051 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_400 -# %bb.401: # %vector.body4760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.401: # %vector.body5056.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_402: # %vector.body4760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_402: # %vector.body5056 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_402 -# %bb.403: # %vector.body4765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.403: # %vector.body5064.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_404: # %vector.body4765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_404: # %vector.body5064 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_404 -# %bb.405: # %vector.body4770.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.405: # %vector.body5072.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_406: # %vector.body4770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_406: # %vector.body5072 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_406 b .LBB5_573 .LBB5_407: # %.preheader.i1740.preheader @@ -5432,19 +5382,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_408: # %vector.body4745 +.LBB5_408: # %vector.body5041 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_408 -# %bb.409: # %vector.body4750.preheader +# %bb.409: # %vector.body5046.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_410: # %vector.body4750 +.LBB5_410: # %vector.body5046 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5452,21 +5402,21 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_410 b .LBB5_573 -.LBB5_411: # %vector.body4725.preheader +.LBB5_411: # %vector.body5018.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_412: # %vector.body4725 +.LBB5_412: # %vector.body5018 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_412 -# %bb.413: # %vector.body4730.preheader +# %bb.413: # %vector.body5023.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 @@ -5474,49 +5424,48 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr1, $a4 -.LBB5_414: # %vector.body4730 +.LBB5_414: # %vector.body5023 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a2, -32 xvst $xr1, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_414 -# %bb.415: # %vector.body4735.preheader +# %bb.415: # %vector.body5028.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_416: # %vector.body4735 +.LBB5_416: # %vector.body5028 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_416 -# %bb.417: # %vector.body4740.preheader +# %bb.417: # %vector.body5033.preheader lu12i.w $a2, 93 - ori $a2, $a2, 3156 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_418: # %vector.body4740 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_418: # %vector.body5033 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_418 b .LBB5_573 .LBB5_419: # %.preheader.i1769.preheader @@ -5533,7 +5482,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_420: # %vector.body4720 +.LBB5_420: # %vector.body5013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5555,7 +5504,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_422: # %vector.body4715 +.LBB5_422: # %vector.body5008 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5563,14 +5512,14 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_422 b .LBB5_573 -.LBB5_423: # %vector.body4700.preheader +.LBB5_423: # %vector.body4993.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_424: # %vector.body4700 +.LBB5_424: # %vector.body4993 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5663,14 +5612,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_428 b .LBB5_573 -.LBB5_429: # %vector.body4685.preheader +.LBB5_429: # %vector.body4978.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_430: # %vector.body4685 +.LBB5_430: # %vector.body4978 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5763,32 +5712,31 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_434 b .LBB5_573 -.LBB5_435: # %vector.body4663.preheader +.LBB5_435: # %vector.body4947.preheader + pcalau12i $s1, %pc_hi20(.LCPI5_0) + vld $vr0, $s1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) - addi.d $a0, $s0, 4 - ori $a1, $zero, 2 - lu12i.w $s1, 7 - ori $a2, $s1, 3328 -.LBB5_436: # %vector.body4663 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_436 + addi.d $a0, $s0, 8 + lu12i.w $s2, 7 + ori $a1, $s2, 3328 +.LBB5_436: # %vector.body4947 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_436 # %bb.437: # %.preheader.i1826.preheader lu12i.w $a1, 31 ori $a0, $a1, 1040 @@ -5799,54 +5747,50 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 2104 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_438: # %vector.body4668 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_438 -# %bb.439: # %vector.body4673.preheader + ori $a1, $s2, 3328 +.LBB5_438: # %vector.body4955 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_438 +# %bb.439: # %vector.body4963.preheader lu12i.w $a0, 93 - ori $a0, $a0, 3156 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 3160 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_440: # %vector.body4673 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_440 + ori $a1, $s2, 3328 +.LBB5_440: # %vector.body4963 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_440 # %bb.441: # %.preheader.i1840.preheader lu12i.w $a0, 125 ori $a0, $a0, 128 @@ -5905,30 +5849,30 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_442 b .LBB5_573 -.LBB5_443: # %vector.body4639.preheader +.LBB5_443: # %vector.body4923.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_444: # %vector.body4639 +.LBB5_444: # %vector.body4923 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_444 -# %bb.445: # %vector.body4644.preheader +# %bb.445: # %vector.body4928.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 -.LBB5_446: # %vector.body4644 +.LBB5_446: # %vector.body4928 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5959,14 +5903,14 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_446 -# %bb.447: # %vector.body4651.preheader +# %bb.447: # %vector.body4935.preheader lu12i.w $a4, 62 - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 2096 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_448: # %vector.body4651 +.LBB5_448: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5997,12 +5941,12 @@ init: # @init addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 bnez $a3, .LBB5_448 -# %bb.449: # %vector.body4658.preheader +# %bb.449: # %vector.body4942.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_450: # %vector.body4658 +.LBB5_450: # %vector.body4942 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6010,154 +5954,148 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_450 b .LBB5_573 -.LBB5_451: # %vector.body4624.preheader +.LBB5_451: # %vector.body4902.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_452: # %vector.body4624 +.LBB5_452: # %vector.body4902 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_452 -# %bb.453: # %vector.body4629.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.453: # %vector.body4907.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_454: # %vector.body4629 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_454: # %vector.body4907 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_454 -# %bb.455: # %vector.body4634.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.455: # %vector.body4915.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_456: # %vector.body4634 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_456: # %vector.body4915 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_456 b .LBB5_573 -.LBB5_457: # %vector.body4599.preheader +.LBB5_457: # %vector.body4871.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_458: # %vector.body4599 +.LBB5_458: # %vector.body4871 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_458 -# %bb.459: # %vector.body4604.preheader +# %bb.459: # %vector.body4876.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_460: # %vector.body4604 +.LBB5_460: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_460 -# %bb.461: # %vector.body4609.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.461: # %vector.body4881.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_462: # %vector.body4609 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_462: # %vector.body4881 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_462 -# %bb.463: # %vector.body4614.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.463: # %vector.body4889.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_464: # %vector.body4614 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_464 -# %bb.465: # %vector.body4619.preheader + ori $a3, $a1, 3328 +.LBB5_464: # %vector.body4889 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 + bnez $a3, .LBB5_464 +# %bb.465: # %vector.body4897.preheader lu12i.w $a2, 125 ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3264 -.LBB5_466: # %vector.body4619 +.LBB5_466: # %vector.body4897 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6165,45 +6103,45 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_466 b .LBB5_573 -.LBB5_467: # %vector.body4574.preheader +.LBB5_467: # %vector.body4843.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_468: # %vector.body4574 +.LBB5_468: # %vector.body4843 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_468 -# %bb.469: # %vector.body4579.preheader +# %bb.469: # %vector.body4848.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_470: # %vector.body4579 +.LBB5_470: # %vector.body4848 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_470 -# %bb.471: # %vector.body4584.preheader +# %bb.471: # %vector.body4853.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_472: # %vector.body4584 +.LBB5_472: # %vector.body4853 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_472 -# %bb.473: # %vector.body4589.preheader +# %bb.473: # %vector.body4858.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a2, $a0, $a2 @@ -6211,37 +6149,36 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_474: # %vector.body4589 +.LBB5_474: # %vector.body4858 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_474 -# %bb.475: # %vector.body4594.preheader +# %bb.475: # %vector.body4863.preheader lu12i.w $a2, 125 - ori $a2, $a2, 132 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_476: # %vector.body4594 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_476: # %vector.body4863 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_476 b .LBB5_573 .LBB5_477: # %.preheader.i1927.preheader @@ -6258,74 +6195,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_478: # %vector.body4554 +.LBB5_478: # %vector.body4817 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_478 -# %bb.479: # %vector.body4559.preheader +# %bb.479: # %vector.body4822.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_480: # %vector.body4559 +.LBB5_480: # %vector.body4822 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_480 -# %bb.481: # %vector.body4564.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.481: # %vector.body4827.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_482: # %vector.body4564 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_482: # %vector.body4827 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_482 -# %bb.483: # %vector.body4569.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.483: # %vector.body4835.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_484: # %vector.body4569 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_484: # %vector.body4835 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_484 b .LBB5_573 .LBB5_485: # %.preheader.i1949.preheader @@ -6461,669 +6395,638 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_490 b .LBB5_573 -.LBB5_491: # %vector.body4519.preheader +.LBB5_491: # %vector.body4773.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_492: # %vector.body4519 +.LBB5_492: # %vector.body4773 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_492 -# %bb.493: # %vector.body4524.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.493: # %vector.body4778.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_494: # %vector.body4524 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_494: # %vector.body4778 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_494 -# %bb.495: # %vector.body4529.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.495: # %vector.body4786.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_496: # %vector.body4529 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_496: # %vector.body4786 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_496 -# %bb.497: # %vector.body4534.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.497: # %vector.body4794.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_498: # %vector.body4534 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_498: # %vector.body4794 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_498 b .LBB5_573 -.LBB5_499: # %vector.body4489.preheader +.LBB5_499: # %vector.body4734.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_500: # %vector.body4489 +.LBB5_500: # %vector.body4734 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_500 -# %bb.501: # %vector.body4494.preheader +# %bb.501: # %vector.body4739.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_502: # %vector.body4494 +.LBB5_502: # %vector.body4739 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_502 -# %bb.503: # %vector.body4499.preheader +# %bb.503: # %vector.body4744.preheader lu12i.w $a2, 46 ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_504: # %vector.body4499 +.LBB5_504: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_504 -# %bb.505: # %vector.body4504.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.505: # %vector.body4749.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_506: # %vector.body4504 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_506: # %vector.body4749 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_506 -# %bb.507: # %vector.body4509.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.507: # %vector.body4757.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_508: # %vector.body4509 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_508: # %vector.body4757 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_508 -# %bb.509: # %vector.body4514.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.509: # %vector.body4765.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_510: # %vector.body4514 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_510: # %vector.body4765 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_510 b .LBB5_573 -.LBB5_511: # %vector.body4459.preheader +.LBB5_511: # %vector.body4695.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_512: # %vector.body4459 +.LBB5_512: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_512 -# %bb.513: # %vector.body4464.preheader +# %bb.513: # %vector.body4700.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_514: # %vector.body4464 +.LBB5_514: # %vector.body4700 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_514 -# %bb.515: # %vector.body4469.preheader +# %bb.515: # %vector.body4705.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_516: # %vector.body4469 +.LBB5_516: # %vector.body4705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_516 -# %bb.517: # %vector.body4474.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.517: # %vector.body4710.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_518: # %vector.body4474 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_518: # %vector.body4710 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_518 -# %bb.519: # %vector.body4479.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.519: # %vector.body4718.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_520: # %vector.body4479 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_520: # %vector.body4718 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_520 -# %bb.521: # %vector.body4484.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.521: # %vector.body4726.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_522: # %vector.body4484 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_522: # %vector.body4726 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_522 b .LBB5_573 -.LBB5_523: # %vector.body4429.preheader +.LBB5_523: # %vector.body4656.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_524: # %vector.body4429 +.LBB5_524: # %vector.body4656 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_524 -# %bb.525: # %vector.body4434.preheader +# %bb.525: # %vector.body4661.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_526: # %vector.body4434 +.LBB5_526: # %vector.body4661 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_526 -# %bb.527: # %vector.body4439.preheader +# %bb.527: # %vector.body4666.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_528: # %vector.body4439 +.LBB5_528: # %vector.body4666 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_528 -# %bb.529: # %vector.body4444.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.529: # %vector.body4671.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_530: # %vector.body4444 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_530: # %vector.body4671 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_530 -# %bb.531: # %vector.body4449.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.531: # %vector.body4679.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_532: # %vector.body4449 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_532: # %vector.body4679 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_532 -# %bb.533: # %vector.body4454.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.533: # %vector.body4687.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_534: # %vector.body4454 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_534: # %vector.body4687 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_534 b .LBB5_573 -.LBB5_535: # %vector.body4404.preheader +.LBB5_535: # %vector.body4622.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_536: # %vector.body4404 +.LBB5_536: # %vector.body4622 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_536 -# %bb.537: # %vector.body4409.preheader +# %bb.537: # %vector.body4627.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_538: # %vector.body4409 +.LBB5_538: # %vector.body4627 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_538 -# %bb.539: # %vector.body4414.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.539: # %vector.body4632.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_540: # %vector.body4414 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_540: # %vector.body4632 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_540 -# %bb.541: # %vector.body4419.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.541: # %vector.body4640.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_542: # %vector.body4419 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_542: # %vector.body4640 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_542 -# %bb.543: # %vector.body4424.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.543: # %vector.body4648.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_544: # %vector.body4424 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_544: # %vector.body4648 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_544 b .LBB5_573 -.LBB5_545: # %vector.body4389.preheader +.LBB5_545: # %vector.body4601.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_546: # %vector.body4389 +.LBB5_546: # %vector.body4601 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_546 -# %bb.547: # %vector.body4394.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.547: # %vector.body4606.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_548: # %vector.body4394 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_548: # %vector.body4606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_548 -# %bb.549: # %vector.body4399.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.549: # %vector.body4614.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_550: # %vector.body4399 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_550: # %vector.body4614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_550 b .LBB5_573 -.LBB5_551: # %vector.body4374.preheader +.LBB5_551: # %vector.body4580.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_552: # %vector.body4374 +.LBB5_552: # %vector.body4580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_552 -# %bb.553: # %vector.body4379.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.553: # %vector.body4585.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_554: # %vector.body4379 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_554: # %vector.body4585 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_554 -# %bb.555: # %vector.body4384.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.555: # %vector.body4593.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_556: # %vector.body4384 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_556: # %vector.body4593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_556 b .LBB5_573 .LBB5_557: # %.preheader.i2121.preheader @@ -7140,19 +7043,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_558: # %vector.body4364 +.LBB5_558: # %vector.body4570 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_558 -# %bb.559: # %vector.body4369.preheader +# %bb.559: # %vector.body4575.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_560: # %vector.body4369 +.LBB5_560: # %vector.body4575 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7174,7 +7077,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_562: # %vector.body4359 +.LBB5_562: # %vector.body4565 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7196,7 +7099,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_564: # %vector.body4354 +.LBB5_564: # %vector.body4560 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7204,31 +7107,30 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_564 b .LBB5_573 -.LBB5_565: # %vector.body4349.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_566: # %vector.body4349 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_566 +.LBB5_565: # %vector.body4552.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_566: # %vector.body4552 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_566 b .LBB5_573 .LBB5_567: # %.preheader.i2154.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7377,43 +7279,43 @@ init: # @init bnez $a2, .LBB5_572 .LBB5_573: # %set1d.exit1086 move $a0, $zero - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $fp, $sp, 64 # 8-byte Folded Reload - ld.d $ra, $sp, 72 # 8-byte Folded Reload - addi.d $sp, $sp, 80 + ld.d $s2, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload + ld.d $s0, $sp, 72 # 8-byte Folded Reload + ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $ra, $sp, 88 # 8-byte Folded Reload + addi.d $sp, $sp, 96 ret .LBB5_574: # %.preheader.i2177.preheader pcalau12i $a0, %pc_hi20(global_data+640192) addi.d $a0, $a0, %pc_lo12(global_data+640192) b .LBB5_345 -.LBB5_575: # %vector.body4325.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_576: # %vector.body4325 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_576 +.LBB5_575: # %vector.body4525.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_576: # %vector.body4525 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_576 b .LBB5_573 -.LBB5_577: # %vector.body4320.preheader +.LBB5_577: # %vector.body4520.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 @@ -7421,7 +7323,7 @@ init: # @init lu12i.w $a2, 260096 ori $a2, $a2, 8 xvreplgr2vr.w $xr0, $a2 -.LBB5_578: # %vector.body4320 +.LBB5_578: # %vector.body4520 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7429,162 +7331,155 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_578 b .LBB5_573 -.LBB5_579: # %vector.body4310.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_580: # %vector.body4310 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_579: # %vector.body4504.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_580: # %vector.body4504 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_580 -# %bb.581: # %vector.body4315.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_582: # %vector.body4315 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.581: # %vector.body4512.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_582: # %vector.body4512 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_582 b .LBB5_573 -.LBB5_583: # %vector.body4305.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_584: # %vector.body4305 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_584 +.LBB5_583: # %vector.body4496.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_584: # %vector.body4496 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_584 b .LBB5_573 -.LBB5_585: # %vector.body4300.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_586: # %vector.body4300 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_586 +.LBB5_585: # %vector.body4488.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_586: # %vector.body4488 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_586 b .LBB5_573 -.LBB5_587: # %vector.body4295.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_588: # %vector.body4295 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_588 +.LBB5_587: # %vector.body4480.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_588: # %vector.body4480 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_588 b .LBB5_573 -.LBB5_589: # %vector.body4290.preheader +.LBB5_589: # %vector.body4472.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_590: # %vector.body4290 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_590 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_590: # %vector.body4472 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_590 b .LBB5_607 .LBB5_591: # %.preheader.i2230.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7603,79 +7498,74 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a1, $fp, $a0 - ori $a2, $zero, 2 + ori $a0, $a0, 2104 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + add.d $a2, $fp, $a0 lu12i.w $a0, 7 ori $a3, $a0, 3328 -.LBB5_592: # %vector.body4275 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_592: # %vector.body4448 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_592 -# %bb.593: # %vector.body4280.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.593: # %vector.body4456.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_594: # %vector.body4280 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_594: # %vector.body4456 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_594 -# %bb.595: # %vector.body4285.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.595: # %vector.body4464.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_596: # %vector.body4285 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_596: # %vector.body4464 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_596 b .LBB5_573 .LBB5_597: # %.preheader34.i2253.preheader @@ -7734,42 +7624,41 @@ init: # @init lu12i.w $a1, 220 ori $a1, $a1, 1212 b .LBB5_630 -.LBB5_600: # %vector.body4263.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_601: # %vector.body4263 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_601 - b .LBB5_573 -.LBB5_602: # %vector.body4256.preheader +.LBB5_600: # %vector.body4433.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_601: # %vector.body4433 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_601 + b .LBB5_573 +.LBB5_602: # %vector.body4426.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_603: # %vector.body4256 +.LBB5_603: # %vector.body4426 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -7804,45 +7693,44 @@ init: # @init pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) b .LBB5_610 -.LBB5_605: # %vector.body4251.preheader +.LBB5_605: # %vector.body4418.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_606: # %vector.body4251 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_606 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_606: # %vector.body4418 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_606 .LBB5_607: # %set1d.exit2229 lu12i.w $a1, 31 ori $a1, $a1, 1020 lu12i.w $a2, -262144 b .LBB5_687 -.LBB5_608: # %vector.body4246.preheader +.LBB5_608: # %vector.body4413.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_609: # %vector.body4246 +.LBB5_609: # %vector.body4413 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7855,14 +7743,14 @@ init: # @init add.d $a0, $a0, $a2 ori $a2, $a1, 1024 b .LBB5_346 -.LBB5_611: # %vector.body4241.preheader +.LBB5_611: # %vector.body4408.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_612: # %vector.body4241 +.LBB5_612: # %vector.body4408 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7886,147 +7774,141 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 b .LBB5_573 -.LBB5_614: # %vector.body4216.preheader +.LBB5_614: # %vector.body4374.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_615: # %vector.body4216 +.LBB5_615: # %vector.body4374 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_615 -# %bb.616: # %vector.body4221.preheader +# %bb.616: # %vector.body4379.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_617: # %vector.body4221 +.LBB5_617: # %vector.body4379 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_617 -# %bb.618: # %vector.body4226.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.618: # %vector.body4384.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_619: # %vector.body4226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_619: # %vector.body4384 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_619 -# %bb.620: # %vector.body4231.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.620: # %vector.body4392.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_621: # %vector.body4231 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_621: # %vector.body4392 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_621 -# %bb.622: # %vector.body4236.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.622: # %vector.body4400.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_623: # %vector.body4236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_623: # %vector.body4400 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_623 b .LBB5_573 -.LBB5_624: # %vector.body4211.preheader +.LBB5_624: # %vector.body4366.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_625: # %vector.body4211 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_625 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_625: # %vector.body4366 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_625 # %bb.626: # %set1d.exit2327 lu12i.w $a1, 31 b .LBB5_686 -.LBB5_627: # %vector.body4204.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_627: # %vector.body4359.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_628: # %vector.body4204 +.LBB5_628: # %vector.body4359 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8075,81 +7957,77 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ori $a0, $s0, 1044 + ori $a0, $s0, 1048 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_632: # %vector.body4199 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_632 - b .LBB5_573 -.LBB5_633: # %vector.body4189.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_634: # %vector.body4189 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_634 -# %bb.635: # %vector.body4194.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_636: # %vector.body4194 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_632: # %vector.body4351 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_632 + b .LBB5_573 +.LBB5_633: # %vector.body4335.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_634: # %vector.body4335 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_634 +# %bb.635: # %vector.body4343.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_636: # %vector.body4343 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_636 b .LBB5_573 .LBB5_637: # %.preheader34.i2352.preheader @@ -8248,26 +8126,26 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_640 b .LBB5_573 -.LBB5_641: # %vector.body4167.preheader +.LBB5_641: # %vector.body4313.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_642: # %vector.body4167 +.LBB5_642: # %vector.body4313 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_642 -# %bb.643: # %vector.body4172.preheader +# %bb.643: # %vector.body4318.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_644: # %vector.body4172 +.LBB5_644: # %vector.body4318 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8275,78 +8153,75 @@ init: # @init addi.d $a2, $a2, 64 bnez $a1, .LBB5_644 b .LBB5_653 -.LBB5_645: # %vector.body4157.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_646: # %vector.body4157 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_645: # %vector.body4297.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_646: # %vector.body4297 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_646 -# %bb.647: # %vector.body4162.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_648: # %vector.body4162 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.647: # %vector.body4305.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_648: # %vector.body4305 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_648 b .LBB5_573 -.LBB5_649: # %vector.body4147.preheader +.LBB5_649: # %vector.body4287.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_650: # %vector.body4147 +.LBB5_650: # %vector.body4287 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_650 -# %bb.651: # %vector.body4152.preheader +# %bb.651: # %vector.body4292.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_652: # %vector.body4152 +.LBB5_652: # %vector.body4292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8359,134 +8234,128 @@ init: # @init lu12i.w $a2, 260096 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_654: # %vector.body4132.preheader +.LBB5_654: # %vector.body4266.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_655: # %vector.body4132 +.LBB5_655: # %vector.body4266 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_655 -# %bb.656: # %vector.body4137.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.656: # %vector.body4271.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_657: # %vector.body4137 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_657: # %vector.body4271 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_657 -# %bb.658: # %vector.body4142.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.658: # %vector.body4279.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_659: # %vector.body4142 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_659: # %vector.body4279 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_659 b .LBB5_573 -.LBB5_660: # %vector.body4117.preheader +.LBB5_660: # %vector.body4245.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_661: # %vector.body4117 +.LBB5_661: # %vector.body4245 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_661 -# %bb.662: # %vector.body4122.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.662: # %vector.body4250.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_663: # %vector.body4122 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_663: # %vector.body4250 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_663 -# %bb.664: # %vector.body4127.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.664: # %vector.body4258.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_665: # %vector.body4127 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_665: # %vector.body4258 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_665 b .LBB5_573 .LBB5_666: # %.preheader.i2421.preheader @@ -8503,74 +8372,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_667: # %vector.body4097 +.LBB5_667: # %vector.body4219 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_667 -# %bb.668: # %vector.body4102.preheader +# %bb.668: # %vector.body4224.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_669: # %vector.body4102 +.LBB5_669: # %vector.body4224 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_669 -# %bb.670: # %vector.body4107.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.670: # %vector.body4229.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_671: # %vector.body4107 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_671: # %vector.body4229 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_671 -# %bb.672: # %vector.body4112.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.672: # %vector.body4237.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_673: # %vector.body4112 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_673: # %vector.body4237 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_673 b .LBB5_573 .LBB5_674: # %.preheader.i2443.preheader @@ -8719,69 +8585,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_679 b .LBB5_573 -.LBB5_680: # %vector.body4063.preheader +.LBB5_680: # %vector.body4179.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 xvldi $xr0, -1424 -.LBB5_681: # %vector.body4063 +.LBB5_681: # %vector.body4179 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a3, $a3, -16 addi.d $a1, $a1, 64 bnez $a3, .LBB5_681 -# %bb.682: # %vector.body4068.preheader +# %bb.682: # %vector.body4184.preheader lu12i.w $a1, 31 - ori $a3, $a1, 1044 - add.d $a3, $a0, $a3 - ori $a4, $zero, 2 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a4, $a1, 1048 + add.d $a4, $a0, $a4 ori $a5, $a2, 3328 -.LBB5_683: # %vector.body4068 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - addi.d $a7, $a4, -1 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa0, $a7 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a5, $a5, -2 - addi.w $a4, $a4, 2 +.LBB5_683: # %vector.body4184 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a4, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a5, $a5, -4 + addi.d $a4, $a4, 16 bnez $a5, .LBB5_683 -# %bb.684: # %vector.body4073.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2100 +# %bb.684: # %vector.body4192.preheader + lu12i.w $a4, 62 + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a3, $a4, 2104 add.d $a3, $a0, $a3 - ori $a4, $zero, 2 ori $a2, $a2, 3328 -.LBB5_685: # %vector.body4073 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a4, 31, 0 - addi.d $a6, $a4, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a2, $a2, -2 - addi.w $a4, $a4, 2 +.LBB5_685: # %vector.body4192 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a3, $a3, 16 bnez $a2, .LBB5_685 .LBB5_686: # %set1d.exit2479 ori $a1, $a1, 1020 @@ -8790,16 +8653,16 @@ init: # @init lu32i.d $a2, 0 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_688: # %vector.body4051.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_688: # %vector.body4167.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_689: # %vector.body4051 +.LBB5_689: # %vector.body4167 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8830,12 +8693,12 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_689 -# %bb.690: # %vector.body4058.preheader +# %bb.690: # %vector.body4174.preheader pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 xvldi $xr0, -1424 -.LBB5_691: # %vector.body4058 +.LBB5_691: # %vector.body4174 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -8843,28 +8706,28 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_691 b .LBB5_573 -.LBB5_692: # %vector.body4039.preheader +.LBB5_692: # %vector.body4155.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_693: # %vector.body4039 +.LBB5_693: # %vector.body4155 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_693 -# %bb.694: # %vector.body4044.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.694: # %vector.body4160.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_695: # %vector.body4044 +.LBB5_695: # %vector.body4160 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8904,15 +8767,15 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_697: # %vector.body4032 +.LBB5_697: # %vector.body4148 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8944,28 +8807,28 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_697 b .LBB5_573 -.LBB5_698: # %vector.body4020.preheader +.LBB5_698: # %vector.body4136.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_699: # %vector.body4020 +.LBB5_699: # %vector.body4136 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_699 -# %bb.700: # %vector.body4025.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.700: # %vector.body4141.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_701: # %vector.body4025 +.LBB5_701: # %vector.body4141 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8997,30 +8860,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_701 b .LBB5_573 -.LBB5_702: # %vector.body4008.preheader +.LBB5_702: # %vector.body4124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_703: # %vector.body4008 +.LBB5_703: # %vector.body4124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_703 -# %bb.704: # %vector.body4013.preheader +# %bb.704: # %vector.body4129.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_705: # %vector.body4013 +.LBB5_705: # %vector.body4129 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9052,30 +8915,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_705 b .LBB5_573 -.LBB5_706: # %vector.body3996.preheader +.LBB5_706: # %vector.body4112.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_707: # %vector.body3996 +.LBB5_707: # %vector.body4112 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_707 -# %bb.708: # %vector.body4001.preheader +# %bb.708: # %vector.body4117.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_709: # %vector.body4001 +.LBB5_709: # %vector.body4117 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9107,7 +8970,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_709 b .LBB5_573 -.LBB5_710: # %vector.body3971.preheader +.LBB5_710: # %vector.body4081.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 @@ -9115,7 +8978,7 @@ init: # @init ori $a1, $s0, 3328 xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill -.LBB5_711: # %vector.body3971 +.LBB5_711: # %vector.body4081 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9123,63 +8986,60 @@ init: # @init addi.d $a1, $a1, -16 addi.d $a0, $a0, 64 bnez $a1, .LBB5_711 -# %bb.712: # %vector.body3976.preheader +# %bb.712: # %vector.body4086.preheader lu12i.w $a1, 31 - ori $a0, $a1, 1044 - add.d $a0, $fp, $a0 - ori $a2, $zero, 2 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a2, $a1, 1048 + add.d $a2, $fp, $a2 ori $a3, $s0, 3328 -.LBB5_713: # %vector.body3976 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_713: # %vector.body4086 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_713 -# %bb.714: # %vector.body3981.preheader - lu12i.w $a0, 62 - ori $a0, $a0, 2100 +# %bb.714: # %vector.body4094.preheader + lu12i.w $a2, 62 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a2, 2104 add.d $a0, $fp, $a0 - ori $a2, $zero, 2 - ori $a3, $s0, 3328 -.LBB5_715: # %vector.body3981 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_715 -# %bb.716: # %vector.body3986.preheader + ori $a2, $s0, 3328 +.LBB5_715: # %vector.body4094 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a0, $a0, 16 + bnez $a2, .LBB5_715 +# %bb.716: # %vector.body4102.preheader lu12i.w $a0, 93 ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 xvldi $xr0, -1296 -.LBB5_717: # %vector.body3986 +.LBB5_717: # %vector.body4102 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -9198,7 +9058,7 @@ init: # @init ori $a0, $a0, 2500 add.d $a0, $fp, $a0 ori $a1, $s0, 3328 -.LBB5_719: # %vector.body3991 +.LBB5_719: # %vector.body4107 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9207,238 +9067,225 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_719 b .LBB5_573 -.LBB5_720: # %vector.body3946.preheader +.LBB5_720: # %vector.body4044.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_721: # %vector.body3946 +.LBB5_721: # %vector.body4044 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_721 -# %bb.722: # %vector.body3951.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.722: # %vector.body4049.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_723: # %vector.body3951 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_723: # %vector.body4049 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_723 -# %bb.724: # %vector.body3956.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.724: # %vector.body4057.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_725: # %vector.body3956 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_725: # %vector.body4057 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_725 -# %bb.726: # %vector.body3961.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.726: # %vector.body4065.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_727: # %vector.body3961 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_727: # %vector.body4065 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_727 -# %bb.728: # %vector.body3966.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.728: # %vector.body4073.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_729: # %vector.body3966 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_729: # %vector.body4073 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_729 b .LBB5_573 -.LBB5_730: # %vector.body3931.preheader +.LBB5_730: # %vector.body4023.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_731: # %vector.body3931 +.LBB5_731: # %vector.body4023 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_731 -# %bb.732: # %vector.body3936.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.732: # %vector.body4028.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_733: # %vector.body3936 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_733: # %vector.body4028 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_733 -# %bb.734: # %vector.body3941.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.734: # %vector.body4036.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_735: # %vector.body3941 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_735: # %vector.body4036 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_735 b .LBB5_573 -.LBB5_736: # %vector.body3921.preheader +.LBB5_736: # %vector.body4007.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_737: # %vector.body3921 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_737: # %vector.body4007 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_737 -# %bb.738: # %vector.body3926.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.738: # %vector.body4015.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_739: # %vector.body3926 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_739: # %vector.body4015 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_739 b .LBB5_573 .LBB5_740: # %.preheader.i2608.preheader @@ -9455,14 +9302,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_741: # %vector.body3911 +.LBB5_741: # %vector.body3997 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_741 -# %bb.742: # %vector.body3916.preheader +# %bb.742: # %vector.body4002.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -9470,7 +9317,7 @@ init: # @init lu12i.w $a2, 219235 ori $a2, $a2, 1981 xvreplgr2vr.w $xr0, $a2 -.LBB5_743: # %vector.body3916 +.LBB5_743: # %vector.body4002 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -9488,14 +9335,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_745: # %vector.body3904 +.LBB5_745: # %vector.body3990 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9527,248 +9374,237 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_745 b .LBB5_573 -.LBB5_746: # %vector.body3879.preheader +.LBB5_746: # %vector.body3959.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_747: # %vector.body3879 +.LBB5_747: # %vector.body3959 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_747 -# %bb.748: # %vector.body3884.preheader +# %bb.748: # %vector.body3964.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_749: # %vector.body3884 +.LBB5_749: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_749 -# %bb.750: # %vector.body3889.preheader +# %bb.750: # %vector.body3969.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_751: # %vector.body3889 +.LBB5_751: # %vector.body3969 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_751 -# %bb.752: # %vector.body3894.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.752: # %vector.body3974.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_753: # %vector.body3894 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_753: # %vector.body3974 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_753 -# %bb.754: # %vector.body3899.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.754: # %vector.body3982.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_755: # %vector.body3899 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_755: # %vector.body3982 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_755 b .LBB5_573 -.LBB5_756: # %vector.body3859.preheader +.LBB5_756: # %vector.body3930.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_757: # %vector.body3859 +.LBB5_757: # %vector.body3930 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_757 -# %bb.758: # %vector.body3864.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.758: # %vector.body3935.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_759: # %vector.body3864 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_759: # %vector.body3935 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_759 -# %bb.760: # %vector.body3869.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.760: # %vector.body3943.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_761: # %vector.body3869 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_761: # %vector.body3943 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_761 -# %bb.762: # %vector.body3874.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.762: # %vector.body3951.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_763: # %vector.body3874 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_763: # %vector.body3951 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_763 b .LBB5_573 -.LBB5_764: # %vector.body3844.preheader +.LBB5_764: # %vector.body3909.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_765: # %vector.body3844 +.LBB5_765: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_765 -# %bb.766: # %vector.body3849.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_767: # %vector.body3849 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.766: # %vector.body3914.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a1, 3328 +.LBB5_767: # %vector.body3914 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_767 -# %bb.768: # %vector.body3854.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.768: # %vector.body3922.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_769: # %vector.body3854 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_769: # %vector.body3922 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_769 b .LBB5_573 .LBB5_770: # %.preheader.i2684.preheader @@ -9785,103 +9621,99 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_771: # %vector.body3829 +.LBB5_771: # %vector.body3888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_771 -# %bb.772: # %vector.body3834.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.772: # %vector.body3893.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_773: # %vector.body3834 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_773: # %vector.body3893 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_773 -# %bb.774: # %vector.body3839.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.774: # %vector.body3901.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_775: # %vector.body3839 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_775: # %vector.body3901 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_775 b .LBB5_573 -.LBB5_776: # %vector.body3819.preheader +.LBB5_776: # %vector.body3875.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 32 - lu12i.w $a2, 7 - ori $a3, $a2, 3328 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_777: # %vector.body3819 +.LBB5_777: # %vector.body3875 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 - addi.d $a1, $a1, 64 + addi.d $a2, $a2, 64 bnez $a3, .LBB5_777 -# %bb.778: # %vector.body3824.preheader - lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a0, $a0, $a1 - ori $a1, $zero, 2 - ori $a2, $a2, 3328 -.LBB5_779: # %vector.body3824 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_779 +# %bb.778: # %vector.body3880.preheader + lu12i.w $a2, 31 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 1048 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_779: # %vector.body3880 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_779 b .LBB5_573 .LBB5_780: # %.preheader.i2711.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9897,23 +9729,23 @@ init: # @init lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_781: # %vector.body3807 +.LBB5_781: # %vector.body3863 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_781 -# %bb.782: # %vector.body3812.preheader +# %bb.782: # %vector.body3868.preheader lu12i.w $a0, 62 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) pcalau12i $a2, %pc_hi20(.LCPI5_1) - xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_2) + xvld $xr1, $a2, %pc_lo12(.LCPI5_2) ori $a0, $a0, 2096 add.d $a0, $fp, $a0 ori $a1, $a1, 3328 -.LBB5_783: # %vector.body3812 +.LBB5_783: # %vector.body3868 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9959,142 +9791,135 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_785: # %vector.body3792 +.LBB5_785: # %vector.body3842 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_785 -# %bb.786: # %vector.body3797.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.786: # %vector.body3847.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_787: # %vector.body3797 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_787: # %vector.body3847 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_787 -# %bb.788: # %vector.body3802.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.788: # %vector.body3855.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_789: # %vector.body3802 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_789: # %vector.body3855 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_789 b .LBB5_573 -.LBB5_790: # %vector.body3782.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_791: # %vector.body3782 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_790: # %vector.body3826.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_791: # %vector.body3826 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_791 -# %bb.792: # %vector.body3787.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_793: # %vector.body3787 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.792: # %vector.body3834.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_793: # %vector.body3834 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_793 b .LBB5_573 -.LBB5_794: # %vector.body3770.preheader +.LBB5_794: # %vector.body3811.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_795: # %vector.body3770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_795 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_795: # %vector.body3811 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_795 # %bb.796: # %.preheader34.i2757.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -10160,127 +9985,121 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_799: # %vector.body3755 +.LBB5_799: # %vector.body3790 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_799 -# %bb.800: # %vector.body3760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.800: # %vector.body3795.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_801: # %vector.body3760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_801: # %vector.body3795 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_801 -# %bb.802: # %vector.body3765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.802: # %vector.body3803.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_803: # %vector.body3765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_803: # %vector.body3803 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_803 b .LBB5_573 -.LBB5_804: # %vector.body3740.preheader +.LBB5_804: # %vector.body3769.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_805: # %vector.body3740 +.LBB5_805: # %vector.body3769 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_805 -# %bb.806: # %vector.body3745.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.806: # %vector.body3774.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_807: # %vector.body3745 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_807: # %vector.body3774 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_807 -# %bb.808: # %vector.body3750.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.808: # %vector.body3782.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_809: # %vector.body3750 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_809: # %vector.body3782 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_809 b .LBB5_573 .LBB5_810: # %.preheader.i2797.preheader @@ -10293,14 +10112,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_811: # %vector.body3733 +.LBB5_811: # %vector.body3762 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10342,14 +10161,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_813: # %vector.body3726 +.LBB5_813: # %vector.body3755 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10391,14 +10210,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_815: # %vector.body3719 +.LBB5_815: # %vector.body3748 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10440,14 +10259,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_817: # %vector.body3712 +.LBB5_817: # %vector.body3741 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10489,14 +10308,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_819: # %vector.body3705 +.LBB5_819: # %vector.body3734 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10528,26 +10347,26 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_819 b .LBB5_573 -.LBB5_820: # %vector.body3695.preheader +.LBB5_820: # %vector.body3724.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_821: # %vector.body3695 +.LBB5_821: # %vector.body3724 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_821 -# %bb.822: # %vector.body3700.preheader +# %bb.822: # %vector.body3729.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_823: # %vector.body3700 +.LBB5_823: # %vector.body3729 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10555,95 +10374,92 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_823 b .LBB5_573 -.LBB5_824: # %vector.body3680.preheader +.LBB5_824: # %vector.body3703.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_825: # %vector.body3680 +.LBB5_825: # %vector.body3703 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_825 -# %bb.826: # %vector.body3685.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.826: # %vector.body3708.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_827: # %vector.body3685 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_827: # %vector.body3708 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_827 -# %bb.828: # %vector.body3690.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.828: # %vector.body3716.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_829: # %vector.body3690 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_829: # %vector.body3716 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_829 b .LBB5_573 -.LBB5_830: # %vector.body3668.preheader +.LBB5_830: # %vector.body3691.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_831: # %vector.body3668 +.LBB5_831: # %vector.body3691 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_831 -# %bb.832: # %vector.body3673.preheader +# %bb.832: # %vector.body3696.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_833: # %vector.body3673 +.LBB5_833: # %vector.body3696 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10675,16 +10491,16 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_833 b .LBB5_573 -.LBB5_834: # %vector.body3653.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_834: # %vector.body3674.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_835: # %vector.body3653 +.LBB5_835: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10715,7 +10531,7 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_835 -# %bb.836: # %vector.body3658.preheader +# %bb.836: # %vector.body3681.preheader pcalau12i $a1, %pc_hi20(global_data) addi.d $a1, $a1, %pc_lo12(global_data) lu12i.w $a2, 31 @@ -10723,20 +10539,20 @@ init: # @init add.d $a2, $a1, $a2 ori $a3, $a0, 3328 xvldi $xr0, -1424 -.LBB5_837: # %vector.body3658 +.LBB5_837: # %vector.body3681 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_837 -# %bb.838: # %vector.body3663.preheader +# %bb.838: # %vector.body3686.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 xvldi $xr0, -1296 -.LBB5_839: # %vector.body3663 +.LBB5_839: # %vector.body3686 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -10744,40 +10560,40 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_839 b .LBB5_573 -.LBB5_840: # %vector.body3638.preheader +.LBB5_840: # %vector.body3659.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_841: # %vector.body3638 +.LBB5_841: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_841 -# %bb.842: # %vector.body3643.preheader +# %bb.842: # %vector.body3664.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -3264 -.LBB5_843: # %vector.body3643 +.LBB5_843: # %vector.body3664 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_843 -# %bb.844: # %vector.body3648.preheader +# %bb.844: # %vector.body3669.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3265 -.LBB5_845: # %vector.body3648 +.LBB5_845: # %vector.body3669 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10785,82 +10601,78 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_845 b .LBB5_573 -.LBB5_846: # %vector.body3633.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_847: # %vector.body3633 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_847 - b .LBB5_573 -.LBB5_848: # %vector.body3623.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 +.LBB5_846: # %vector.body3651.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_849: # %vector.body3623 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_849 -# %bb.850: # %vector.body3628.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_851: # %vector.body3628 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_847: # %vector.body3651 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_847 + b .LBB5_573 +.LBB5_848: # %vector.body3635.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_849: # %vector.body3635 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_849 +# %bb.850: # %vector.body3643.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_851: # %vector.body3643 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_851 b .LBB5_573 .Lfunc_end5: diff --git a/results/MultiSource/Benchmarks/TSVC/Expansion-flt/CMakeFiles/Expansion-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Expansion-flt/CMakeFiles/Expansion-flt.dir/tsc.s index fc9741c..4c8d0f9 100644 --- a/results/MultiSource/Benchmarks/TSVC/Expansion-flt/CMakeFiles/Expansion-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Expansion-flt/CMakeFiles/Expansion-flt.dir/tsc.s @@ -11,6 +11,11 @@ .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI0_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1d .p2align 5 @@ -63,30 +68,29 @@ set1d: # @set1d bnez $a1, .LBB0_3 b .LBB0_8 .LBB0_4: # %vector.body40.preheader - addi.d $a0, $a0, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a1, %pc_hi20(.LCPI0_2) + vld $vr0, $a1, %pc_lo12(.LCPI0_2) + addi.d $a0, $a0, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB0_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB0_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB0_5 b .LBB0_8 .LBB0_6: # %.preheader.preheader move $a2, $zero @@ -118,6 +122,11 @@ set1d: # @set1d .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI1_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1ds .p2align 5 @@ -170,30 +179,29 @@ set1ds: # @set1ds bnez $a0, .LBB1_3 b .LBB1_8 .LBB1_4: # %vector.body40.preheader - addi.d $a0, $a1, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) + addi.d $a0, $a1, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB1_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB1_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB1_5 b .LBB1_8 .LBB1_6: # %.preheader.preheader move $a0, $zero @@ -685,14 +693,19 @@ check: # @check .word .LBB4_14-.LJTI4_0 .word .LBB4_17-.LJTI4_0 # -- End function - .section .rodata.cst32,"aM",@progbits,32 - .p2align 5, 0x0 # -- Begin function init + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 # -- Begin function init .LCPI5_0: + .dword 0 # 0x0 + .dword 1 # 0x1 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 +.LCPI5_1: .dword 4 # 0x4 .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 -.LCPI5_1: +.LCPI5_2: .dword 0 # 0x0 .dword 1 # 0x1 .dword 2 # 0x2 @@ -703,11 +716,12 @@ check: # @check .type init,@function init: # @init # %bb.0: - addi.d $sp, $sp, -80 - st.d $ra, $sp, 72 # 8-byte Folded Spill - st.d $fp, $sp, 64 # 8-byte Folded Spill - st.d $s0, $sp, 56 # 8-byte Folded Spill - st.d $s1, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -96 + st.d $ra, $sp, 88 # 8-byte Folded Spill + st.d $fp, $sp, 80 # 8-byte Folded Spill + st.d $s0, $sp, 72 # 8-byte Folded Spill + st.d $s1, $sp, 64 # 8-byte Folded Spill + st.d $s2, $sp, 56 # 8-byte Folded Spill move $fp, $a0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a1, $a0, %pc_lo12(.L.str.1) @@ -1661,131 +1675,122 @@ init: # @init jirl $ra, $ra, 0 bnez $a0, .LBB5_573 # %bb.136: # %vector.body.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 + addi.d $a3, $a0, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 .LBB5_137: # %vector.body # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_137 # %bb.138: # %vector.body3598.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 + lu12i.w $a3, 31 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 .LBB5_139: # %vector.body3598 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_139 -# %bb.140: # %vector.body3603.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_141: # %vector.body3603 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.140: # %vector.body3606.preheader + lu12i.w $a3, 62 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_141: # %vector.body3606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_141 -# %bb.142: # %vector.body3608.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_143: # %vector.body3608 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.142: # %vector.body3614.preheader + lu12i.w $a3, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_143: # %vector.body3614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_143 -# %bb.144: # %vector.body3613.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_145: # %vector.body3613 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 - bnez $a1, .LBB5_145 +# %bb.144: # %vector.body3622.preheader + lu12i.w $a3, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a3, 136 + add.d $a1, $a0, $a1 + ori $a2, $a2, 3328 +.LBB5_145: # %vector.body3622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_145 # %bb.146: # %.preheader34.i2938.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -1837,7 +1842,7 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_147 b .LBB5_573 -.LBB5_148: # %vector.body5445.preheader +.LBB5_148: # %vector.body5846.preheader pcalau12i $a0, %pc_hi20(X+4) addi.d $a0, $a0, %pc_lo12(X+4) pcalau12i $a1, %pc_hi20(V+4) @@ -1852,7 +1857,7 @@ init: # @init lu12i.w $a6, 7 ori $a6, $a6, 3328 .p2align 4, , 16 -.LBB5_149: # %vector.body5445 +.LBB5_149: # %vector.body5846 # =>This Inner Loop Header: Depth=1 bstrpick.d $a7, $a5, 31, 0 addi.d $t0, $a5, -5 @@ -1896,7 +1901,7 @@ init: # @init addi.d $a4, $a4, 8 bnez $a6, .LBB5_149 b .LBB5_573 -.LBB5_150: # %vector.body5412.preheader +.LBB5_150: # %vector.body5813.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -1904,24 +1909,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_151: # %vector.body5412 +.LBB5_151: # %vector.body5813 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_151 -# %bb.152: # %vector.body5417.preheader +# %bb.152: # %vector.body5818.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_153: # %vector.body5417 +.LBB5_153: # %vector.body5818 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1952,15 +1957,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_153 -# %bb.154: # %vector.body5424.preheader +# %bb.154: # %vector.body5825.preheader lu12i.w $a4, 62 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 2096 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_155: # %vector.body5424 +.LBB5_155: # %vector.body5825 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1991,15 +1996,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_155 -# %bb.156: # %vector.body5431.preheader +# %bb.156: # %vector.body5832.preheader lu12i.w $a4, 93 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 3152 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_157: # %vector.body5431 +.LBB5_157: # %vector.body5832 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2030,15 +2035,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_157 -# %bb.158: # %vector.body5438.preheader +# %bb.158: # %vector.body5839.preheader lu12i.w $a4, 125 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_159: # %vector.body5438 +.LBB5_159: # %vector.body5839 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2070,7 +2075,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_159 b .LBB5_573 -.LBB5_160: # %vector.body5400.preheader +.LBB5_160: # %vector.body5801.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2078,24 +2083,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_161: # %vector.body5400 +.LBB5_161: # %vector.body5801 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_161 -# %bb.162: # %vector.body5405.preheader +# %bb.162: # %vector.body5806.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_163: # %vector.body5405 +.LBB5_163: # %vector.body5806 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2127,7 +2132,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_163 b .LBB5_573 -.LBB5_164: # %vector.body5388.preheader +.LBB5_164: # %vector.body5789.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2135,24 +2140,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_165: # %vector.body5388 +.LBB5_165: # %vector.body5789 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_165 -# %bb.166: # %vector.body5393.preheader +# %bb.166: # %vector.body5794.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_167: # %vector.body5393 +.LBB5_167: # %vector.body5794 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2288,7 +2293,7 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_171 b .LBB5_573 -.LBB5_172: # %vector.body5354.preheader +.LBB5_172: # %vector.body5755.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2296,7 +2301,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_173: # %vector.body5354 +.LBB5_173: # %vector.body5755 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2436,14 +2441,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_179 b .LBB5_573 -.LBB5_180: # %vector.body5349.preheader +.LBB5_180: # %vector.body5750.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_181: # %vector.body5349 +.LBB5_181: # %vector.body5750 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -2451,7 +2456,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_181 b .LBB5_573 -.LBB5_182: # %vector.body5339.preheader +.LBB5_182: # %vector.body5740.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2459,7 +2464,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_183: # %vector.body5339 +.LBB5_183: # %vector.body5740 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2610,30 +2615,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_189 b .LBB5_573 -.LBB5_190: # %vector.body5315.preheader +.LBB5_190: # %vector.body5716.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_191: # %vector.body5315 +.LBB5_191: # %vector.body5716 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_191 -# %bb.192: # %vector.body5320.preheader +# %bb.192: # %vector.body5721.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_193: # %vector.body5320 +.LBB5_193: # %vector.body5721 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2665,30 +2670,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_193 b .LBB5_573 -.LBB5_194: # %vector.body5303.preheader +.LBB5_194: # %vector.body5704.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_195: # %vector.body5303 +.LBB5_195: # %vector.body5704 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_195 -# %bb.196: # %vector.body5308.preheader +# %bb.196: # %vector.body5709.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_197: # %vector.body5308 +.LBB5_197: # %vector.body5709 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2734,74 +2739,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_199: # %vector.body5283 +.LBB5_199: # %vector.body5678 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_199 -# %bb.200: # %vector.body5288.preheader +# %bb.200: # %vector.body5683.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_201: # %vector.body5288 +.LBB5_201: # %vector.body5683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_201 -# %bb.202: # %vector.body5293.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.202: # %vector.body5688.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_203: # %vector.body5293 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_203: # %vector.body5688 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_203 -# %bb.204: # %vector.body5298.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.204: # %vector.body5696.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_205: # %vector.body5298 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_205: # %vector.body5696 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_205 b .LBB5_573 .LBB5_206: # %.preheader.i1195.preheader @@ -2818,74 +2820,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_207: # %vector.body5263 +.LBB5_207: # %vector.body5652 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_207 -# %bb.208: # %vector.body5268.preheader +# %bb.208: # %vector.body5657.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_209: # %vector.body5268 +.LBB5_209: # %vector.body5657 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_209 -# %bb.210: # %vector.body5273.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.210: # %vector.body5662.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_211: # %vector.body5273 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_211: # %vector.body5662 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_211 -# %bb.212: # %vector.body5278.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.212: # %vector.body5670.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_213: # %vector.body5278 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_213: # %vector.body5670 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_213 b .LBB5_573 .LBB5_214: # %.preheader.i1217.preheader @@ -3072,31 +3071,30 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_221 -# %bb.222: # %vector.body5236.preheader - pcalau12i $a1, %pc_hi20(array+4) - addi.d $a1, $a1, %pc_lo12(array+4) - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_223: # %vector.body5236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_223 +# %bb.222: # %vector.body5622.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(array+8) + addi.d $a1, $a1, %pc_lo12(array+8) + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_223: # %vector.body5622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_223 # %bb.224: # %.preheader34.i1253.preheader lu12i.w $a1, 284 ori $a1, $a1, 1888 @@ -3162,87 +3160,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_227: # %vector.body5211 +.LBB5_227: # %vector.body5588 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_227 -# %bb.228: # %vector.body5216.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.228: # %vector.body5593.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_229: # %vector.body5216 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_229: # %vector.body5593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_229 -# %bb.230: # %vector.body5221.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.230: # %vector.body5601.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_231: # %vector.body5221 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_231: # %vector.body5601 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_231 -# %bb.232: # %vector.body5226.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.232: # %vector.body5609.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_233: # %vector.body5226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_233: # %vector.body5609 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_233 b .LBB5_573 .LBB5_234: # %.preheader.i1284.preheader @@ -3259,32 +3252,32 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -3264 -.LBB5_235: # %vector.body5196 +.LBB5_235: # %vector.body5573 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_235 -# %bb.236: # %vector.body5201.preheader +# %bb.236: # %vector.body5578.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_237: # %vector.body5201 +.LBB5_237: # %vector.body5578 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_237 -# %bb.238: # %vector.body5206.preheader +# %bb.238: # %vector.body5583.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_239: # %vector.body5206 +.LBB5_239: # %vector.body5583 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -3292,30 +3285,30 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_239 b .LBB5_573 -.LBB5_240: # %vector.body5184.preheader +.LBB5_240: # %vector.body5561.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_241: # %vector.body5184 +.LBB5_241: # %vector.body5561 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_241 -# %bb.242: # %vector.body5189.preheader +# %bb.242: # %vector.body5566.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_243: # %vector.body5189 +.LBB5_243: # %vector.body5566 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3392,65 +3385,62 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_245 -# %bb.246: # %vector.body5174.preheader +# %bb.246: # %vector.body5545.preheader lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_247: # %vector.body5174 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_247: # %vector.body5545 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_247 -# %bb.248: # %vector.body5179.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.248: # %vector.body5553.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_249: # %vector.body5179 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_249: # %vector.body5553 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_249 b .LBB5_573 -.LBB5_250: # %vector.body5157.preheader +.LBB5_250: # %vector.body5528.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_251: # %vector.body5157 +.LBB5_251: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3506,30 +3496,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_253 b .LBB5_573 -.LBB5_254: # %vector.body5145.preheader +.LBB5_254: # %vector.body5516.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_255: # %vector.body5145 +.LBB5_255: # %vector.body5516 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_255 -# %bb.256: # %vector.body5150.preheader +# %bb.256: # %vector.body5521.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_257: # %vector.body5150 +.LBB5_257: # %vector.body5521 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3561,14 +3551,14 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_257 b .LBB5_573 -.LBB5_258: # %vector.body5125.preheader +.LBB5_258: # %vector.body5487.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 xvldi $xr0, -1424 -.LBB5_259: # %vector.body5125 +.LBB5_259: # %vector.body5487 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3583,274 +3573,263 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 + lu12i.w $a1, 62 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 2104 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_261: # %vector.body5130 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_261: # %vector.body5492 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_261 -# %bb.262: # %vector.body5135.preheader - lu12i.w $a0, 93 - ori $a0, $a0, 3156 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 +# %bb.262: # %vector.body5500.preheader + lu12i.w $a1, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 3160 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_263: # %vector.body5135 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_263: # %vector.body5500 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_263 -# %bb.264: # %vector.body5140.preheader - lu12i.w $a0, 125 - ori $a0, $a0, 132 +# %bb.264: # %vector.body5508.preheader + lu12i.w $a1, 125 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a1, 136 add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - ori $a2, $s0, 3328 -.LBB5_265: # %vector.body5140 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_265 + ori $a1, $s0, 3328 +.LBB5_265: # %vector.body5508 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_265 b .LBB5_573 -.LBB5_266: # %vector.body5094.preheader +.LBB5_266: # %vector.body5450.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_267: # %vector.body5094 +.LBB5_267: # %vector.body5450 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_267 -# %bb.268: # %vector.body5099.preheader +# %bb.268: # %vector.body5455.preheader lu12i.w $a2, 31 ori $a3, $a2, 1048 add.d $a4, $a0, $a3 lu12i.w $a3, 3 ori $a5, $a3, 3712 lu12i.w $a6, 260096 -.LBB5_269: # %vector.body5099 +.LBB5_269: # %vector.body5455 # =>This Inner Loop Header: Depth=1 st.w $a6, $a4, -8 st.w $a6, $a4, 0 addi.d $a5, $a5, -2 addi.d $a4, $a4, 16 bnez $a5, .LBB5_269 -# %bb.270: # %vector.body5104.preheader +# %bb.270: # %vector.body5460.preheader ori $a2, $a2, 1052 add.d $a2, $a0, $a2 ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 -.LBB5_271: # %vector.body5104 +.LBB5_271: # %vector.body5460 # =>This Inner Loop Header: Depth=1 st.w $a4, $a2, -8 st.w $a4, $a2, 0 addi.d $a3, $a3, -2 addi.d $a2, $a2, 16 bnez $a3, .LBB5_271 -# %bb.272: # %vector.body5110.preheader +# %bb.272: # %vector.body5466.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_273: # %vector.body5110 +.LBB5_273: # %vector.body5466 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_273 -# %bb.274: # %vector.body5115.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.274: # %vector.body5471.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_275: # %vector.body5115 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_275: # %vector.body5471 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_275 -# %bb.276: # %vector.body5120.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.276: # %vector.body5479.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_277: # %vector.body5120 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_277: # %vector.body5479 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_277 b .LBB5_573 -.LBB5_278: # %vector.body5079.preheader +.LBB5_278: # %vector.body5429.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_279: # %vector.body5079 +.LBB5_279: # %vector.body5429 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_279 -# %bb.280: # %vector.body5084.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.280: # %vector.body5434.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_281: # %vector.body5084 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_281: # %vector.body5434 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_281 -# %bb.282: # %vector.body5089.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.282: # %vector.body5442.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_283: # %vector.body5089 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_283: # %vector.body5442 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_283 b .LBB5_573 -.LBB5_284: # %vector.body5067.preheader +.LBB5_284: # %vector.body5417.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_285: # %vector.body5067 +.LBB5_285: # %vector.body5417 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_285 -# %bb.286: # %vector.body5072.preheader +# %bb.286: # %vector.body5422.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_287: # %vector.body5072 +.LBB5_287: # %vector.body5422 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3882,30 +3861,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_287 b .LBB5_573 -.LBB5_288: # %vector.body5055.preheader +.LBB5_288: # %vector.body5405.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_289: # %vector.body5055 +.LBB5_289: # %vector.body5405 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_289 -# %bb.290: # %vector.body5060.preheader +# %bb.290: # %vector.body5410.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_291: # %vector.body5060 +.LBB5_291: # %vector.body5410 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3937,30 +3916,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_291 b .LBB5_573 -.LBB5_292: # %vector.body5043.preheader +.LBB5_292: # %vector.body5393.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_293: # %vector.body5043 +.LBB5_293: # %vector.body5393 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_293 -# %bb.294: # %vector.body5048.preheader +# %bb.294: # %vector.body5398.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_295: # %vector.body5048 +.LBB5_295: # %vector.body5398 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3992,30 +3971,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_295 b .LBB5_573 -.LBB5_296: # %vector.body5031.preheader +.LBB5_296: # %vector.body5381.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_297: # %vector.body5031 +.LBB5_297: # %vector.body5381 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_297 -# %bb.298: # %vector.body5036.preheader +# %bb.298: # %vector.body5386.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_299: # %vector.body5036 +.LBB5_299: # %vector.body5386 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4047,30 +4026,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_299 b .LBB5_573 -.LBB5_300: # %vector.body5019.preheader +.LBB5_300: # %vector.body5369.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_301: # %vector.body5019 +.LBB5_301: # %vector.body5369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_301 -# %bb.302: # %vector.body5024.preheader +# %bb.302: # %vector.body5374.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_303: # %vector.body5024 +.LBB5_303: # %vector.body5374 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4102,69 +4081,66 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_303 b .LBB5_573 -.LBB5_304: # %vector.body5004.preheader +.LBB5_304: # %vector.body5348.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_305: # %vector.body5004 +.LBB5_305: # %vector.body5348 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_305 -# %bb.306: # %vector.body5009.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.306: # %vector.body5353.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_307: # %vector.body5009 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_307: # %vector.body5353 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_307 -# %bb.308: # %vector.body5014.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.308: # %vector.body5361.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_309: # %vector.body5014 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_309: # %vector.body5361 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_309 b .LBB5_573 .LBB5_310: # %.preheader.i1469.preheader @@ -4181,254 +4157,241 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_311: # %vector.body4984 +.LBB5_311: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_311 -# %bb.312: # %vector.body4989.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.312: # %vector.body5324.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_313: # %vector.body4989 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_313: # %vector.body5324 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_313 -# %bb.314: # %vector.body4994.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.314: # %vector.body5332.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_315: # %vector.body4994 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_315: # %vector.body5332 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_315 -# %bb.316: # %vector.body4999.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.316: # %vector.body5340.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_317: # %vector.body4999 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_317: # %vector.body5340 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_317 b .LBB5_573 -.LBB5_318: # %vector.body4964.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_319: # %vector.body4964 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_318: # %vector.body5293.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_319: # %vector.body5293 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_319 -# %bb.320: # %vector.body4969.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1072 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 +# %bb.320: # %vector.body5301.preheader + lu12i.w $a3, 31 + ori $a3, $a3, 1072 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 xvldi $xr0, -1424 -.LBB5_321: # %vector.body4969 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_321 -# %bb.322: # %vector.body4974.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2128 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 -.LBB5_323: # %vector.body4974 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_323 -# %bb.324: # %vector.body4979.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_325: # %vector.body4979 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_321: # %vector.body5301 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_321 +# %bb.322: # %vector.body5306.preheader + lu12i.w $a3, 62 + ori $a3, $a3, 2128 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 +.LBB5_323: # %vector.body5306 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_323 +# %bb.324: # %vector.body5311.preheader + lu12i.w $a3, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 3160 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_325: # %vector.body5311 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_325 b .LBB5_573 -.LBB5_326: # %vector.body4944.preheader +.LBB5_326: # %vector.body5264.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_327: # %vector.body4944 +.LBB5_327: # %vector.body5264 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_327 -# %bb.328: # %vector.body4949.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.328: # %vector.body5269.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_329: # %vector.body4949 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_329: # %vector.body5269 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_329 -# %bb.330: # %vector.body4954.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.330: # %vector.body5277.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_331: # %vector.body4954 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_331: # %vector.body5277 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_331 -# %bb.332: # %vector.body4959.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.332: # %vector.body5285.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_333: # %vector.body4959 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_333: # %vector.body5285 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_333 b .LBB5_573 .LBB5_334: # %.preheader.i1529.preheader @@ -4445,19 +4408,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_335: # %vector.body4934 +.LBB5_335: # %vector.body5254 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_335 -# %bb.336: # %vector.body4939.preheader +# %bb.336: # %vector.body5259.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_337: # %vector.body4939 +.LBB5_337: # %vector.body5259 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -4915,69 +4878,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_358 b .LBB5_573 -.LBB5_359: # %vector.body4850.preheader +.LBB5_359: # %vector.body5164.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_360: # %vector.body4850 +.LBB5_360: # %vector.body5164 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_360 -# %bb.361: # %vector.body4855.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.361: # %vector.body5169.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_362: # %vector.body4855 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a5 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_362: # %vector.body5169 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a3, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_362 -# %bb.363: # %vector.body4860.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.363: # %vector.body5177.preheader + lu12i.w $a3, 62 + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 ori $a1, $a1, 3328 -.LBB5_364: # %vector.body4860 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a4 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 +.LBB5_364: # %vector.body5177 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a2, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a1, $a1, -4 + addi.d $a2, $a2, 16 bnez $a1, .LBB5_364 # %bb.365: # %.preheader.i1631.preheader lu12i.w $a1, 156 @@ -5071,50 +5031,50 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_368 b .LBB5_573 -.LBB5_369: # %vector.body4830.preheader +.LBB5_369: # %vector.body5144.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_370: # %vector.body4830 +.LBB5_370: # %vector.body5144 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_370 -# %bb.371: # %vector.body4835.preheader +# %bb.371: # %vector.body5149.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_372: # %vector.body4835 +.LBB5_372: # %vector.body5149 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_372 -# %bb.373: # %vector.body4840.preheader +# %bb.373: # %vector.body5154.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_374: # %vector.body4840 +.LBB5_374: # %vector.body5154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_374 -# %bb.375: # %vector.body4845.preheader +# %bb.375: # %vector.body5159.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_376: # %vector.body4845 +.LBB5_376: # %vector.body5159 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5122,7 +5082,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_376 b .LBB5_573 -.LBB5_377: # %vector.body4810.preheader +.LBB5_377: # %vector.body5124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -5131,43 +5091,43 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_378: # %vector.body4810 +.LBB5_378: # %vector.body5124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_378 -# %bb.379: # %vector.body4815.preheader +# %bb.379: # %vector.body5129.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_380: # %vector.body4815 +.LBB5_380: # %vector.body5129 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_380 -# %bb.381: # %vector.body4820.preheader +# %bb.381: # %vector.body5134.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_382: # %vector.body4820 +.LBB5_382: # %vector.body5134 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_382 -# %bb.383: # %vector.body4825.preheader +# %bb.383: # %vector.body5139.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_384: # %vector.body4825 +.LBB5_384: # %vector.body5139 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5189,87 +5149,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_386: # %vector.body4790 +.LBB5_386: # %vector.body5095 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_386 -# %bb.387: # %vector.body4795.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.387: # %vector.body5100.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_388: # %vector.body4795 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_388: # %vector.body5100 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_388 -# %bb.389: # %vector.body4800.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.389: # %vector.body5108.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_390: # %vector.body4800 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_390: # %vector.body5108 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_390 -# %bb.391: # %vector.body4805.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.391: # %vector.body5116.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_392: # %vector.body4805 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_392: # %vector.body5116 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_392 b .LBB5_573 .LBB5_393: # %.preheader.i1701.preheader @@ -5286,14 +5241,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_394: # %vector.body4775 +.LBB5_394: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_394 -# %bb.395: # %vector.body4780.preheader +# %bb.395: # %vector.body5085.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -5301,19 +5256,19 @@ init: # @init lu12i.w $a3, 219235 ori $a3, $a3, 1981 xvreplgr2vr.w $xr0, $a3 -.LBB5_396: # %vector.body4780 +.LBB5_396: # %vector.body5085 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_396 -# %bb.397: # %vector.body4785.preheader +# %bb.397: # %vector.body5090.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_398: # %vector.body4785 +.LBB5_398: # %vector.body5090 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5335,87 +5290,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_400: # %vector.body4755 +.LBB5_400: # %vector.body5051 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_400 -# %bb.401: # %vector.body4760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.401: # %vector.body5056.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_402: # %vector.body4760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_402: # %vector.body5056 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_402 -# %bb.403: # %vector.body4765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.403: # %vector.body5064.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_404: # %vector.body4765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_404: # %vector.body5064 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_404 -# %bb.405: # %vector.body4770.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.405: # %vector.body5072.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_406: # %vector.body4770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_406: # %vector.body5072 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_406 b .LBB5_573 .LBB5_407: # %.preheader.i1740.preheader @@ -5432,19 +5382,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_408: # %vector.body4745 +.LBB5_408: # %vector.body5041 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_408 -# %bb.409: # %vector.body4750.preheader +# %bb.409: # %vector.body5046.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_410: # %vector.body4750 +.LBB5_410: # %vector.body5046 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5452,21 +5402,21 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_410 b .LBB5_573 -.LBB5_411: # %vector.body4725.preheader +.LBB5_411: # %vector.body5018.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_412: # %vector.body4725 +.LBB5_412: # %vector.body5018 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_412 -# %bb.413: # %vector.body4730.preheader +# %bb.413: # %vector.body5023.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 @@ -5474,49 +5424,48 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr1, $a4 -.LBB5_414: # %vector.body4730 +.LBB5_414: # %vector.body5023 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a2, -32 xvst $xr1, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_414 -# %bb.415: # %vector.body4735.preheader +# %bb.415: # %vector.body5028.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_416: # %vector.body4735 +.LBB5_416: # %vector.body5028 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_416 -# %bb.417: # %vector.body4740.preheader +# %bb.417: # %vector.body5033.preheader lu12i.w $a2, 93 - ori $a2, $a2, 3156 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_418: # %vector.body4740 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_418: # %vector.body5033 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_418 b .LBB5_573 .LBB5_419: # %.preheader.i1769.preheader @@ -5533,7 +5482,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_420: # %vector.body4720 +.LBB5_420: # %vector.body5013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5555,7 +5504,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_422: # %vector.body4715 +.LBB5_422: # %vector.body5008 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5563,14 +5512,14 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_422 b .LBB5_573 -.LBB5_423: # %vector.body4700.preheader +.LBB5_423: # %vector.body4993.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_424: # %vector.body4700 +.LBB5_424: # %vector.body4993 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5663,14 +5612,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_428 b .LBB5_573 -.LBB5_429: # %vector.body4685.preheader +.LBB5_429: # %vector.body4978.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_430: # %vector.body4685 +.LBB5_430: # %vector.body4978 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5763,32 +5712,31 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_434 b .LBB5_573 -.LBB5_435: # %vector.body4663.preheader +.LBB5_435: # %vector.body4947.preheader + pcalau12i $s1, %pc_hi20(.LCPI5_0) + vld $vr0, $s1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) - addi.d $a0, $s0, 4 - ori $a1, $zero, 2 - lu12i.w $s1, 7 - ori $a2, $s1, 3328 -.LBB5_436: # %vector.body4663 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_436 + addi.d $a0, $s0, 8 + lu12i.w $s2, 7 + ori $a1, $s2, 3328 +.LBB5_436: # %vector.body4947 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_436 # %bb.437: # %.preheader.i1826.preheader lu12i.w $a1, 31 ori $a0, $a1, 1040 @@ -5799,54 +5747,50 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 2104 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_438: # %vector.body4668 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_438 -# %bb.439: # %vector.body4673.preheader + ori $a1, $s2, 3328 +.LBB5_438: # %vector.body4955 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_438 +# %bb.439: # %vector.body4963.preheader lu12i.w $a0, 93 - ori $a0, $a0, 3156 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 3160 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_440: # %vector.body4673 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_440 + ori $a1, $s2, 3328 +.LBB5_440: # %vector.body4963 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_440 # %bb.441: # %.preheader.i1840.preheader lu12i.w $a0, 125 ori $a0, $a0, 128 @@ -5905,30 +5849,30 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_442 b .LBB5_573 -.LBB5_443: # %vector.body4639.preheader +.LBB5_443: # %vector.body4923.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_444: # %vector.body4639 +.LBB5_444: # %vector.body4923 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_444 -# %bb.445: # %vector.body4644.preheader +# %bb.445: # %vector.body4928.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 -.LBB5_446: # %vector.body4644 +.LBB5_446: # %vector.body4928 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5959,14 +5903,14 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_446 -# %bb.447: # %vector.body4651.preheader +# %bb.447: # %vector.body4935.preheader lu12i.w $a4, 62 - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 2096 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_448: # %vector.body4651 +.LBB5_448: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5997,12 +5941,12 @@ init: # @init addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 bnez $a3, .LBB5_448 -# %bb.449: # %vector.body4658.preheader +# %bb.449: # %vector.body4942.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_450: # %vector.body4658 +.LBB5_450: # %vector.body4942 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6010,154 +5954,148 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_450 b .LBB5_573 -.LBB5_451: # %vector.body4624.preheader +.LBB5_451: # %vector.body4902.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_452: # %vector.body4624 +.LBB5_452: # %vector.body4902 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_452 -# %bb.453: # %vector.body4629.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.453: # %vector.body4907.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_454: # %vector.body4629 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_454: # %vector.body4907 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_454 -# %bb.455: # %vector.body4634.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.455: # %vector.body4915.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_456: # %vector.body4634 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_456: # %vector.body4915 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_456 b .LBB5_573 -.LBB5_457: # %vector.body4599.preheader +.LBB5_457: # %vector.body4871.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_458: # %vector.body4599 +.LBB5_458: # %vector.body4871 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_458 -# %bb.459: # %vector.body4604.preheader +# %bb.459: # %vector.body4876.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_460: # %vector.body4604 +.LBB5_460: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_460 -# %bb.461: # %vector.body4609.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.461: # %vector.body4881.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_462: # %vector.body4609 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_462: # %vector.body4881 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_462 -# %bb.463: # %vector.body4614.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.463: # %vector.body4889.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_464: # %vector.body4614 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_464 -# %bb.465: # %vector.body4619.preheader + ori $a3, $a1, 3328 +.LBB5_464: # %vector.body4889 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 + bnez $a3, .LBB5_464 +# %bb.465: # %vector.body4897.preheader lu12i.w $a2, 125 ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3264 -.LBB5_466: # %vector.body4619 +.LBB5_466: # %vector.body4897 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6165,45 +6103,45 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_466 b .LBB5_573 -.LBB5_467: # %vector.body4574.preheader +.LBB5_467: # %vector.body4843.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_468: # %vector.body4574 +.LBB5_468: # %vector.body4843 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_468 -# %bb.469: # %vector.body4579.preheader +# %bb.469: # %vector.body4848.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_470: # %vector.body4579 +.LBB5_470: # %vector.body4848 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_470 -# %bb.471: # %vector.body4584.preheader +# %bb.471: # %vector.body4853.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_472: # %vector.body4584 +.LBB5_472: # %vector.body4853 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_472 -# %bb.473: # %vector.body4589.preheader +# %bb.473: # %vector.body4858.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a2, $a0, $a2 @@ -6211,37 +6149,36 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_474: # %vector.body4589 +.LBB5_474: # %vector.body4858 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_474 -# %bb.475: # %vector.body4594.preheader +# %bb.475: # %vector.body4863.preheader lu12i.w $a2, 125 - ori $a2, $a2, 132 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_476: # %vector.body4594 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_476: # %vector.body4863 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_476 b .LBB5_573 .LBB5_477: # %.preheader.i1927.preheader @@ -6258,74 +6195,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_478: # %vector.body4554 +.LBB5_478: # %vector.body4817 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_478 -# %bb.479: # %vector.body4559.preheader +# %bb.479: # %vector.body4822.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_480: # %vector.body4559 +.LBB5_480: # %vector.body4822 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_480 -# %bb.481: # %vector.body4564.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.481: # %vector.body4827.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_482: # %vector.body4564 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_482: # %vector.body4827 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_482 -# %bb.483: # %vector.body4569.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.483: # %vector.body4835.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_484: # %vector.body4569 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_484: # %vector.body4835 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_484 b .LBB5_573 .LBB5_485: # %.preheader.i1949.preheader @@ -6461,669 +6395,638 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_490 b .LBB5_573 -.LBB5_491: # %vector.body4519.preheader +.LBB5_491: # %vector.body4773.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_492: # %vector.body4519 +.LBB5_492: # %vector.body4773 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_492 -# %bb.493: # %vector.body4524.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.493: # %vector.body4778.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_494: # %vector.body4524 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_494: # %vector.body4778 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_494 -# %bb.495: # %vector.body4529.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.495: # %vector.body4786.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_496: # %vector.body4529 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_496: # %vector.body4786 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_496 -# %bb.497: # %vector.body4534.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.497: # %vector.body4794.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_498: # %vector.body4534 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_498: # %vector.body4794 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_498 b .LBB5_573 -.LBB5_499: # %vector.body4489.preheader +.LBB5_499: # %vector.body4734.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_500: # %vector.body4489 +.LBB5_500: # %vector.body4734 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_500 -# %bb.501: # %vector.body4494.preheader +# %bb.501: # %vector.body4739.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_502: # %vector.body4494 +.LBB5_502: # %vector.body4739 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_502 -# %bb.503: # %vector.body4499.preheader +# %bb.503: # %vector.body4744.preheader lu12i.w $a2, 46 ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_504: # %vector.body4499 +.LBB5_504: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_504 -# %bb.505: # %vector.body4504.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.505: # %vector.body4749.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_506: # %vector.body4504 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_506: # %vector.body4749 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_506 -# %bb.507: # %vector.body4509.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.507: # %vector.body4757.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_508: # %vector.body4509 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_508: # %vector.body4757 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_508 -# %bb.509: # %vector.body4514.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.509: # %vector.body4765.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_510: # %vector.body4514 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_510: # %vector.body4765 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_510 b .LBB5_573 -.LBB5_511: # %vector.body4459.preheader +.LBB5_511: # %vector.body4695.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_512: # %vector.body4459 +.LBB5_512: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_512 -# %bb.513: # %vector.body4464.preheader +# %bb.513: # %vector.body4700.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_514: # %vector.body4464 +.LBB5_514: # %vector.body4700 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_514 -# %bb.515: # %vector.body4469.preheader +# %bb.515: # %vector.body4705.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_516: # %vector.body4469 +.LBB5_516: # %vector.body4705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_516 -# %bb.517: # %vector.body4474.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.517: # %vector.body4710.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_518: # %vector.body4474 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_518: # %vector.body4710 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_518 -# %bb.519: # %vector.body4479.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.519: # %vector.body4718.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_520: # %vector.body4479 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_520: # %vector.body4718 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_520 -# %bb.521: # %vector.body4484.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.521: # %vector.body4726.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_522: # %vector.body4484 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_522: # %vector.body4726 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_522 b .LBB5_573 -.LBB5_523: # %vector.body4429.preheader +.LBB5_523: # %vector.body4656.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_524: # %vector.body4429 +.LBB5_524: # %vector.body4656 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_524 -# %bb.525: # %vector.body4434.preheader +# %bb.525: # %vector.body4661.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_526: # %vector.body4434 +.LBB5_526: # %vector.body4661 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_526 -# %bb.527: # %vector.body4439.preheader +# %bb.527: # %vector.body4666.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_528: # %vector.body4439 +.LBB5_528: # %vector.body4666 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_528 -# %bb.529: # %vector.body4444.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.529: # %vector.body4671.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_530: # %vector.body4444 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_530: # %vector.body4671 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_530 -# %bb.531: # %vector.body4449.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.531: # %vector.body4679.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_532: # %vector.body4449 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_532: # %vector.body4679 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_532 -# %bb.533: # %vector.body4454.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.533: # %vector.body4687.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_534: # %vector.body4454 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_534: # %vector.body4687 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_534 b .LBB5_573 -.LBB5_535: # %vector.body4404.preheader +.LBB5_535: # %vector.body4622.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_536: # %vector.body4404 +.LBB5_536: # %vector.body4622 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_536 -# %bb.537: # %vector.body4409.preheader +# %bb.537: # %vector.body4627.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_538: # %vector.body4409 +.LBB5_538: # %vector.body4627 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_538 -# %bb.539: # %vector.body4414.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.539: # %vector.body4632.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_540: # %vector.body4414 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_540: # %vector.body4632 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_540 -# %bb.541: # %vector.body4419.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.541: # %vector.body4640.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_542: # %vector.body4419 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_542: # %vector.body4640 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_542 -# %bb.543: # %vector.body4424.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.543: # %vector.body4648.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_544: # %vector.body4424 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_544: # %vector.body4648 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_544 b .LBB5_573 -.LBB5_545: # %vector.body4389.preheader +.LBB5_545: # %vector.body4601.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_546: # %vector.body4389 +.LBB5_546: # %vector.body4601 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_546 -# %bb.547: # %vector.body4394.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.547: # %vector.body4606.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_548: # %vector.body4394 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_548: # %vector.body4606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_548 -# %bb.549: # %vector.body4399.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.549: # %vector.body4614.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_550: # %vector.body4399 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_550: # %vector.body4614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_550 b .LBB5_573 -.LBB5_551: # %vector.body4374.preheader +.LBB5_551: # %vector.body4580.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_552: # %vector.body4374 +.LBB5_552: # %vector.body4580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_552 -# %bb.553: # %vector.body4379.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.553: # %vector.body4585.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_554: # %vector.body4379 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_554: # %vector.body4585 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_554 -# %bb.555: # %vector.body4384.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.555: # %vector.body4593.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_556: # %vector.body4384 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_556: # %vector.body4593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_556 b .LBB5_573 .LBB5_557: # %.preheader.i2121.preheader @@ -7140,19 +7043,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_558: # %vector.body4364 +.LBB5_558: # %vector.body4570 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_558 -# %bb.559: # %vector.body4369.preheader +# %bb.559: # %vector.body4575.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_560: # %vector.body4369 +.LBB5_560: # %vector.body4575 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7174,7 +7077,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_562: # %vector.body4359 +.LBB5_562: # %vector.body4565 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7196,7 +7099,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_564: # %vector.body4354 +.LBB5_564: # %vector.body4560 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7204,31 +7107,30 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_564 b .LBB5_573 -.LBB5_565: # %vector.body4349.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_566: # %vector.body4349 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_566 +.LBB5_565: # %vector.body4552.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_566: # %vector.body4552 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_566 b .LBB5_573 .LBB5_567: # %.preheader.i2154.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7377,43 +7279,43 @@ init: # @init bnez $a2, .LBB5_572 .LBB5_573: # %set1d.exit1086 move $a0, $zero - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $fp, $sp, 64 # 8-byte Folded Reload - ld.d $ra, $sp, 72 # 8-byte Folded Reload - addi.d $sp, $sp, 80 + ld.d $s2, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload + ld.d $s0, $sp, 72 # 8-byte Folded Reload + ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $ra, $sp, 88 # 8-byte Folded Reload + addi.d $sp, $sp, 96 ret .LBB5_574: # %.preheader.i2177.preheader pcalau12i $a0, %pc_hi20(global_data+640192) addi.d $a0, $a0, %pc_lo12(global_data+640192) b .LBB5_345 -.LBB5_575: # %vector.body4325.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_576: # %vector.body4325 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_576 +.LBB5_575: # %vector.body4525.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_576: # %vector.body4525 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_576 b .LBB5_573 -.LBB5_577: # %vector.body4320.preheader +.LBB5_577: # %vector.body4520.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 @@ -7421,7 +7323,7 @@ init: # @init lu12i.w $a2, 260096 ori $a2, $a2, 8 xvreplgr2vr.w $xr0, $a2 -.LBB5_578: # %vector.body4320 +.LBB5_578: # %vector.body4520 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7429,162 +7331,155 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_578 b .LBB5_573 -.LBB5_579: # %vector.body4310.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_580: # %vector.body4310 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_579: # %vector.body4504.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_580: # %vector.body4504 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_580 -# %bb.581: # %vector.body4315.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_582: # %vector.body4315 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.581: # %vector.body4512.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_582: # %vector.body4512 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_582 b .LBB5_573 -.LBB5_583: # %vector.body4305.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_584: # %vector.body4305 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_584 +.LBB5_583: # %vector.body4496.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_584: # %vector.body4496 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_584 b .LBB5_573 -.LBB5_585: # %vector.body4300.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_586: # %vector.body4300 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_586 +.LBB5_585: # %vector.body4488.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_586: # %vector.body4488 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_586 b .LBB5_573 -.LBB5_587: # %vector.body4295.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_588: # %vector.body4295 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_588 +.LBB5_587: # %vector.body4480.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_588: # %vector.body4480 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_588 b .LBB5_573 -.LBB5_589: # %vector.body4290.preheader +.LBB5_589: # %vector.body4472.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_590: # %vector.body4290 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_590 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_590: # %vector.body4472 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_590 b .LBB5_607 .LBB5_591: # %.preheader.i2230.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7603,79 +7498,74 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a1, $fp, $a0 - ori $a2, $zero, 2 + ori $a0, $a0, 2104 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + add.d $a2, $fp, $a0 lu12i.w $a0, 7 ori $a3, $a0, 3328 -.LBB5_592: # %vector.body4275 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_592: # %vector.body4448 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_592 -# %bb.593: # %vector.body4280.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.593: # %vector.body4456.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_594: # %vector.body4280 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_594: # %vector.body4456 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_594 -# %bb.595: # %vector.body4285.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.595: # %vector.body4464.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_596: # %vector.body4285 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_596: # %vector.body4464 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_596 b .LBB5_573 .LBB5_597: # %.preheader34.i2253.preheader @@ -7734,42 +7624,41 @@ init: # @init lu12i.w $a1, 220 ori $a1, $a1, 1212 b .LBB5_630 -.LBB5_600: # %vector.body4263.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_601: # %vector.body4263 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_601 - b .LBB5_573 -.LBB5_602: # %vector.body4256.preheader +.LBB5_600: # %vector.body4433.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_601: # %vector.body4433 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_601 + b .LBB5_573 +.LBB5_602: # %vector.body4426.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_603: # %vector.body4256 +.LBB5_603: # %vector.body4426 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -7804,45 +7693,44 @@ init: # @init pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) b .LBB5_610 -.LBB5_605: # %vector.body4251.preheader +.LBB5_605: # %vector.body4418.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_606: # %vector.body4251 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_606 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_606: # %vector.body4418 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_606 .LBB5_607: # %set1d.exit2229 lu12i.w $a1, 31 ori $a1, $a1, 1020 lu12i.w $a2, -262144 b .LBB5_687 -.LBB5_608: # %vector.body4246.preheader +.LBB5_608: # %vector.body4413.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_609: # %vector.body4246 +.LBB5_609: # %vector.body4413 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7855,14 +7743,14 @@ init: # @init add.d $a0, $a0, $a2 ori $a2, $a1, 1024 b .LBB5_346 -.LBB5_611: # %vector.body4241.preheader +.LBB5_611: # %vector.body4408.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_612: # %vector.body4241 +.LBB5_612: # %vector.body4408 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7886,147 +7774,141 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 b .LBB5_573 -.LBB5_614: # %vector.body4216.preheader +.LBB5_614: # %vector.body4374.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_615: # %vector.body4216 +.LBB5_615: # %vector.body4374 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_615 -# %bb.616: # %vector.body4221.preheader +# %bb.616: # %vector.body4379.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_617: # %vector.body4221 +.LBB5_617: # %vector.body4379 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_617 -# %bb.618: # %vector.body4226.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.618: # %vector.body4384.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_619: # %vector.body4226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_619: # %vector.body4384 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_619 -# %bb.620: # %vector.body4231.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.620: # %vector.body4392.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_621: # %vector.body4231 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_621: # %vector.body4392 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_621 -# %bb.622: # %vector.body4236.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.622: # %vector.body4400.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_623: # %vector.body4236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_623: # %vector.body4400 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_623 b .LBB5_573 -.LBB5_624: # %vector.body4211.preheader +.LBB5_624: # %vector.body4366.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_625: # %vector.body4211 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_625 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_625: # %vector.body4366 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_625 # %bb.626: # %set1d.exit2327 lu12i.w $a1, 31 b .LBB5_686 -.LBB5_627: # %vector.body4204.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_627: # %vector.body4359.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_628: # %vector.body4204 +.LBB5_628: # %vector.body4359 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8075,81 +7957,77 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ori $a0, $s0, 1044 + ori $a0, $s0, 1048 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_632: # %vector.body4199 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_632 - b .LBB5_573 -.LBB5_633: # %vector.body4189.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_634: # %vector.body4189 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_634 -# %bb.635: # %vector.body4194.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_636: # %vector.body4194 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_632: # %vector.body4351 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_632 + b .LBB5_573 +.LBB5_633: # %vector.body4335.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_634: # %vector.body4335 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_634 +# %bb.635: # %vector.body4343.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_636: # %vector.body4343 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_636 b .LBB5_573 .LBB5_637: # %.preheader34.i2352.preheader @@ -8248,26 +8126,26 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_640 b .LBB5_573 -.LBB5_641: # %vector.body4167.preheader +.LBB5_641: # %vector.body4313.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_642: # %vector.body4167 +.LBB5_642: # %vector.body4313 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_642 -# %bb.643: # %vector.body4172.preheader +# %bb.643: # %vector.body4318.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_644: # %vector.body4172 +.LBB5_644: # %vector.body4318 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8275,78 +8153,75 @@ init: # @init addi.d $a2, $a2, 64 bnez $a1, .LBB5_644 b .LBB5_653 -.LBB5_645: # %vector.body4157.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_646: # %vector.body4157 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_645: # %vector.body4297.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_646: # %vector.body4297 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_646 -# %bb.647: # %vector.body4162.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_648: # %vector.body4162 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.647: # %vector.body4305.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_648: # %vector.body4305 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_648 b .LBB5_573 -.LBB5_649: # %vector.body4147.preheader +.LBB5_649: # %vector.body4287.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_650: # %vector.body4147 +.LBB5_650: # %vector.body4287 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_650 -# %bb.651: # %vector.body4152.preheader +# %bb.651: # %vector.body4292.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_652: # %vector.body4152 +.LBB5_652: # %vector.body4292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8359,134 +8234,128 @@ init: # @init lu12i.w $a2, 260096 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_654: # %vector.body4132.preheader +.LBB5_654: # %vector.body4266.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_655: # %vector.body4132 +.LBB5_655: # %vector.body4266 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_655 -# %bb.656: # %vector.body4137.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.656: # %vector.body4271.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_657: # %vector.body4137 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_657: # %vector.body4271 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_657 -# %bb.658: # %vector.body4142.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.658: # %vector.body4279.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_659: # %vector.body4142 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_659: # %vector.body4279 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_659 b .LBB5_573 -.LBB5_660: # %vector.body4117.preheader +.LBB5_660: # %vector.body4245.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_661: # %vector.body4117 +.LBB5_661: # %vector.body4245 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_661 -# %bb.662: # %vector.body4122.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.662: # %vector.body4250.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_663: # %vector.body4122 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_663: # %vector.body4250 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_663 -# %bb.664: # %vector.body4127.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.664: # %vector.body4258.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_665: # %vector.body4127 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_665: # %vector.body4258 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_665 b .LBB5_573 .LBB5_666: # %.preheader.i2421.preheader @@ -8503,74 +8372,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_667: # %vector.body4097 +.LBB5_667: # %vector.body4219 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_667 -# %bb.668: # %vector.body4102.preheader +# %bb.668: # %vector.body4224.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_669: # %vector.body4102 +.LBB5_669: # %vector.body4224 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_669 -# %bb.670: # %vector.body4107.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.670: # %vector.body4229.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_671: # %vector.body4107 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_671: # %vector.body4229 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_671 -# %bb.672: # %vector.body4112.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.672: # %vector.body4237.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_673: # %vector.body4112 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_673: # %vector.body4237 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_673 b .LBB5_573 .LBB5_674: # %.preheader.i2443.preheader @@ -8719,69 +8585,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_679 b .LBB5_573 -.LBB5_680: # %vector.body4063.preheader +.LBB5_680: # %vector.body4179.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 xvldi $xr0, -1424 -.LBB5_681: # %vector.body4063 +.LBB5_681: # %vector.body4179 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a3, $a3, -16 addi.d $a1, $a1, 64 bnez $a3, .LBB5_681 -# %bb.682: # %vector.body4068.preheader +# %bb.682: # %vector.body4184.preheader lu12i.w $a1, 31 - ori $a3, $a1, 1044 - add.d $a3, $a0, $a3 - ori $a4, $zero, 2 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a4, $a1, 1048 + add.d $a4, $a0, $a4 ori $a5, $a2, 3328 -.LBB5_683: # %vector.body4068 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - addi.d $a7, $a4, -1 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa0, $a7 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a5, $a5, -2 - addi.w $a4, $a4, 2 +.LBB5_683: # %vector.body4184 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a4, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a5, $a5, -4 + addi.d $a4, $a4, 16 bnez $a5, .LBB5_683 -# %bb.684: # %vector.body4073.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2100 +# %bb.684: # %vector.body4192.preheader + lu12i.w $a4, 62 + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a3, $a4, 2104 add.d $a3, $a0, $a3 - ori $a4, $zero, 2 ori $a2, $a2, 3328 -.LBB5_685: # %vector.body4073 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a4, 31, 0 - addi.d $a6, $a4, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a2, $a2, -2 - addi.w $a4, $a4, 2 +.LBB5_685: # %vector.body4192 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a3, $a3, 16 bnez $a2, .LBB5_685 .LBB5_686: # %set1d.exit2479 ori $a1, $a1, 1020 @@ -8790,16 +8653,16 @@ init: # @init lu32i.d $a2, 0 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_688: # %vector.body4051.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_688: # %vector.body4167.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_689: # %vector.body4051 +.LBB5_689: # %vector.body4167 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8830,12 +8693,12 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_689 -# %bb.690: # %vector.body4058.preheader +# %bb.690: # %vector.body4174.preheader pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 xvldi $xr0, -1424 -.LBB5_691: # %vector.body4058 +.LBB5_691: # %vector.body4174 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -8843,28 +8706,28 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_691 b .LBB5_573 -.LBB5_692: # %vector.body4039.preheader +.LBB5_692: # %vector.body4155.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_693: # %vector.body4039 +.LBB5_693: # %vector.body4155 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_693 -# %bb.694: # %vector.body4044.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.694: # %vector.body4160.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_695: # %vector.body4044 +.LBB5_695: # %vector.body4160 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8904,15 +8767,15 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_697: # %vector.body4032 +.LBB5_697: # %vector.body4148 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8944,28 +8807,28 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_697 b .LBB5_573 -.LBB5_698: # %vector.body4020.preheader +.LBB5_698: # %vector.body4136.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_699: # %vector.body4020 +.LBB5_699: # %vector.body4136 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_699 -# %bb.700: # %vector.body4025.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.700: # %vector.body4141.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_701: # %vector.body4025 +.LBB5_701: # %vector.body4141 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8997,30 +8860,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_701 b .LBB5_573 -.LBB5_702: # %vector.body4008.preheader +.LBB5_702: # %vector.body4124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_703: # %vector.body4008 +.LBB5_703: # %vector.body4124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_703 -# %bb.704: # %vector.body4013.preheader +# %bb.704: # %vector.body4129.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_705: # %vector.body4013 +.LBB5_705: # %vector.body4129 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9052,30 +8915,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_705 b .LBB5_573 -.LBB5_706: # %vector.body3996.preheader +.LBB5_706: # %vector.body4112.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_707: # %vector.body3996 +.LBB5_707: # %vector.body4112 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_707 -# %bb.708: # %vector.body4001.preheader +# %bb.708: # %vector.body4117.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_709: # %vector.body4001 +.LBB5_709: # %vector.body4117 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9107,7 +8970,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_709 b .LBB5_573 -.LBB5_710: # %vector.body3971.preheader +.LBB5_710: # %vector.body4081.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 @@ -9115,7 +8978,7 @@ init: # @init ori $a1, $s0, 3328 xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill -.LBB5_711: # %vector.body3971 +.LBB5_711: # %vector.body4081 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9123,63 +8986,60 @@ init: # @init addi.d $a1, $a1, -16 addi.d $a0, $a0, 64 bnez $a1, .LBB5_711 -# %bb.712: # %vector.body3976.preheader +# %bb.712: # %vector.body4086.preheader lu12i.w $a1, 31 - ori $a0, $a1, 1044 - add.d $a0, $fp, $a0 - ori $a2, $zero, 2 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a2, $a1, 1048 + add.d $a2, $fp, $a2 ori $a3, $s0, 3328 -.LBB5_713: # %vector.body3976 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_713: # %vector.body4086 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_713 -# %bb.714: # %vector.body3981.preheader - lu12i.w $a0, 62 - ori $a0, $a0, 2100 +# %bb.714: # %vector.body4094.preheader + lu12i.w $a2, 62 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a2, 2104 add.d $a0, $fp, $a0 - ori $a2, $zero, 2 - ori $a3, $s0, 3328 -.LBB5_715: # %vector.body3981 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_715 -# %bb.716: # %vector.body3986.preheader + ori $a2, $s0, 3328 +.LBB5_715: # %vector.body4094 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a0, $a0, 16 + bnez $a2, .LBB5_715 +# %bb.716: # %vector.body4102.preheader lu12i.w $a0, 93 ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 xvldi $xr0, -1296 -.LBB5_717: # %vector.body3986 +.LBB5_717: # %vector.body4102 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -9198,7 +9058,7 @@ init: # @init ori $a0, $a0, 2500 add.d $a0, $fp, $a0 ori $a1, $s0, 3328 -.LBB5_719: # %vector.body3991 +.LBB5_719: # %vector.body4107 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9207,238 +9067,225 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_719 b .LBB5_573 -.LBB5_720: # %vector.body3946.preheader +.LBB5_720: # %vector.body4044.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_721: # %vector.body3946 +.LBB5_721: # %vector.body4044 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_721 -# %bb.722: # %vector.body3951.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.722: # %vector.body4049.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_723: # %vector.body3951 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_723: # %vector.body4049 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_723 -# %bb.724: # %vector.body3956.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.724: # %vector.body4057.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_725: # %vector.body3956 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_725: # %vector.body4057 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_725 -# %bb.726: # %vector.body3961.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.726: # %vector.body4065.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_727: # %vector.body3961 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_727: # %vector.body4065 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_727 -# %bb.728: # %vector.body3966.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.728: # %vector.body4073.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_729: # %vector.body3966 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_729: # %vector.body4073 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_729 b .LBB5_573 -.LBB5_730: # %vector.body3931.preheader +.LBB5_730: # %vector.body4023.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_731: # %vector.body3931 +.LBB5_731: # %vector.body4023 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_731 -# %bb.732: # %vector.body3936.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.732: # %vector.body4028.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_733: # %vector.body3936 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_733: # %vector.body4028 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_733 -# %bb.734: # %vector.body3941.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.734: # %vector.body4036.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_735: # %vector.body3941 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_735: # %vector.body4036 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_735 b .LBB5_573 -.LBB5_736: # %vector.body3921.preheader +.LBB5_736: # %vector.body4007.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_737: # %vector.body3921 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_737: # %vector.body4007 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_737 -# %bb.738: # %vector.body3926.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.738: # %vector.body4015.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_739: # %vector.body3926 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_739: # %vector.body4015 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_739 b .LBB5_573 .LBB5_740: # %.preheader.i2608.preheader @@ -9455,14 +9302,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_741: # %vector.body3911 +.LBB5_741: # %vector.body3997 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_741 -# %bb.742: # %vector.body3916.preheader +# %bb.742: # %vector.body4002.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -9470,7 +9317,7 @@ init: # @init lu12i.w $a2, 219235 ori $a2, $a2, 1981 xvreplgr2vr.w $xr0, $a2 -.LBB5_743: # %vector.body3916 +.LBB5_743: # %vector.body4002 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -9488,14 +9335,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_745: # %vector.body3904 +.LBB5_745: # %vector.body3990 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9527,248 +9374,237 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_745 b .LBB5_573 -.LBB5_746: # %vector.body3879.preheader +.LBB5_746: # %vector.body3959.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_747: # %vector.body3879 +.LBB5_747: # %vector.body3959 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_747 -# %bb.748: # %vector.body3884.preheader +# %bb.748: # %vector.body3964.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_749: # %vector.body3884 +.LBB5_749: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_749 -# %bb.750: # %vector.body3889.preheader +# %bb.750: # %vector.body3969.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_751: # %vector.body3889 +.LBB5_751: # %vector.body3969 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_751 -# %bb.752: # %vector.body3894.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.752: # %vector.body3974.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_753: # %vector.body3894 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_753: # %vector.body3974 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_753 -# %bb.754: # %vector.body3899.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.754: # %vector.body3982.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_755: # %vector.body3899 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_755: # %vector.body3982 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_755 b .LBB5_573 -.LBB5_756: # %vector.body3859.preheader +.LBB5_756: # %vector.body3930.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_757: # %vector.body3859 +.LBB5_757: # %vector.body3930 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_757 -# %bb.758: # %vector.body3864.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.758: # %vector.body3935.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_759: # %vector.body3864 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_759: # %vector.body3935 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_759 -# %bb.760: # %vector.body3869.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.760: # %vector.body3943.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_761: # %vector.body3869 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_761: # %vector.body3943 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_761 -# %bb.762: # %vector.body3874.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.762: # %vector.body3951.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_763: # %vector.body3874 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_763: # %vector.body3951 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_763 b .LBB5_573 -.LBB5_764: # %vector.body3844.preheader +.LBB5_764: # %vector.body3909.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_765: # %vector.body3844 +.LBB5_765: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_765 -# %bb.766: # %vector.body3849.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.766: # %vector.body3914.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_767: # %vector.body3849 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_767: # %vector.body3914 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_767 -# %bb.768: # %vector.body3854.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.768: # %vector.body3922.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_769: # %vector.body3854 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_769: # %vector.body3922 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_769 b .LBB5_573 .LBB5_770: # %.preheader.i2684.preheader @@ -9785,103 +9621,99 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_771: # %vector.body3829 +.LBB5_771: # %vector.body3888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_771 -# %bb.772: # %vector.body3834.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.772: # %vector.body3893.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_773: # %vector.body3834 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_773: # %vector.body3893 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_773 -# %bb.774: # %vector.body3839.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.774: # %vector.body3901.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_775: # %vector.body3839 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_775: # %vector.body3901 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_775 b .LBB5_573 -.LBB5_776: # %vector.body3819.preheader +.LBB5_776: # %vector.body3875.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 32 - lu12i.w $a2, 7 - ori $a3, $a2, 3328 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_777: # %vector.body3819 +.LBB5_777: # %vector.body3875 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 - addi.d $a1, $a1, 64 + addi.d $a2, $a2, 64 bnez $a3, .LBB5_777 -# %bb.778: # %vector.body3824.preheader - lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a0, $a0, $a1 - ori $a1, $zero, 2 - ori $a2, $a2, 3328 -.LBB5_779: # %vector.body3824 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_779 +# %bb.778: # %vector.body3880.preheader + lu12i.w $a2, 31 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 1048 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_779: # %vector.body3880 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_779 b .LBB5_573 .LBB5_780: # %.preheader.i2711.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9897,23 +9729,23 @@ init: # @init lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_781: # %vector.body3807 +.LBB5_781: # %vector.body3863 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_781 -# %bb.782: # %vector.body3812.preheader +# %bb.782: # %vector.body3868.preheader lu12i.w $a0, 62 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) pcalau12i $a2, %pc_hi20(.LCPI5_1) - xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_2) + xvld $xr1, $a2, %pc_lo12(.LCPI5_2) ori $a0, $a0, 2096 add.d $a0, $fp, $a0 ori $a1, $a1, 3328 -.LBB5_783: # %vector.body3812 +.LBB5_783: # %vector.body3868 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9959,142 +9791,135 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_785: # %vector.body3792 +.LBB5_785: # %vector.body3842 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_785 -# %bb.786: # %vector.body3797.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.786: # %vector.body3847.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_787: # %vector.body3797 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_787: # %vector.body3847 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_787 -# %bb.788: # %vector.body3802.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.788: # %vector.body3855.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_789: # %vector.body3802 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_789: # %vector.body3855 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_789 b .LBB5_573 -.LBB5_790: # %vector.body3782.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_791: # %vector.body3782 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_790: # %vector.body3826.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_791: # %vector.body3826 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_791 -# %bb.792: # %vector.body3787.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_793: # %vector.body3787 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.792: # %vector.body3834.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_793: # %vector.body3834 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_793 b .LBB5_573 -.LBB5_794: # %vector.body3770.preheader +.LBB5_794: # %vector.body3811.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_795: # %vector.body3770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_795 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_795: # %vector.body3811 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_795 # %bb.796: # %.preheader34.i2757.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -10160,127 +9985,121 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_799: # %vector.body3755 +.LBB5_799: # %vector.body3790 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_799 -# %bb.800: # %vector.body3760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.800: # %vector.body3795.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_801: # %vector.body3760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_801: # %vector.body3795 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_801 -# %bb.802: # %vector.body3765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.802: # %vector.body3803.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_803: # %vector.body3765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_803: # %vector.body3803 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_803 b .LBB5_573 -.LBB5_804: # %vector.body3740.preheader +.LBB5_804: # %vector.body3769.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_805: # %vector.body3740 +.LBB5_805: # %vector.body3769 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_805 -# %bb.806: # %vector.body3745.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.806: # %vector.body3774.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_807: # %vector.body3745 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_807: # %vector.body3774 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_807 -# %bb.808: # %vector.body3750.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.808: # %vector.body3782.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_809: # %vector.body3750 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_809: # %vector.body3782 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_809 b .LBB5_573 .LBB5_810: # %.preheader.i2797.preheader @@ -10293,14 +10112,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_811: # %vector.body3733 +.LBB5_811: # %vector.body3762 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10342,14 +10161,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_813: # %vector.body3726 +.LBB5_813: # %vector.body3755 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10391,14 +10210,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_815: # %vector.body3719 +.LBB5_815: # %vector.body3748 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10440,14 +10259,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_817: # %vector.body3712 +.LBB5_817: # %vector.body3741 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10489,14 +10308,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_819: # %vector.body3705 +.LBB5_819: # %vector.body3734 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10528,26 +10347,26 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_819 b .LBB5_573 -.LBB5_820: # %vector.body3695.preheader +.LBB5_820: # %vector.body3724.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_821: # %vector.body3695 +.LBB5_821: # %vector.body3724 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_821 -# %bb.822: # %vector.body3700.preheader +# %bb.822: # %vector.body3729.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_823: # %vector.body3700 +.LBB5_823: # %vector.body3729 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10555,95 +10374,92 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_823 b .LBB5_573 -.LBB5_824: # %vector.body3680.preheader +.LBB5_824: # %vector.body3703.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_825: # %vector.body3680 +.LBB5_825: # %vector.body3703 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_825 -# %bb.826: # %vector.body3685.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.826: # %vector.body3708.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_827: # %vector.body3685 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_827: # %vector.body3708 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_827 -# %bb.828: # %vector.body3690.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.828: # %vector.body3716.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_829: # %vector.body3690 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_829: # %vector.body3716 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_829 b .LBB5_573 -.LBB5_830: # %vector.body3668.preheader +.LBB5_830: # %vector.body3691.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_831: # %vector.body3668 +.LBB5_831: # %vector.body3691 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_831 -# %bb.832: # %vector.body3673.preheader +# %bb.832: # %vector.body3696.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_833: # %vector.body3673 +.LBB5_833: # %vector.body3696 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10675,16 +10491,16 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_833 b .LBB5_573 -.LBB5_834: # %vector.body3653.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_834: # %vector.body3674.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_835: # %vector.body3653 +.LBB5_835: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10715,7 +10531,7 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_835 -# %bb.836: # %vector.body3658.preheader +# %bb.836: # %vector.body3681.preheader pcalau12i $a1, %pc_hi20(global_data) addi.d $a1, $a1, %pc_lo12(global_data) lu12i.w $a2, 31 @@ -10723,20 +10539,20 @@ init: # @init add.d $a2, $a1, $a2 ori $a3, $a0, 3328 xvldi $xr0, -1424 -.LBB5_837: # %vector.body3658 +.LBB5_837: # %vector.body3681 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_837 -# %bb.838: # %vector.body3663.preheader +# %bb.838: # %vector.body3686.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 xvldi $xr0, -1296 -.LBB5_839: # %vector.body3663 +.LBB5_839: # %vector.body3686 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -10744,40 +10560,40 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_839 b .LBB5_573 -.LBB5_840: # %vector.body3638.preheader +.LBB5_840: # %vector.body3659.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_841: # %vector.body3638 +.LBB5_841: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_841 -# %bb.842: # %vector.body3643.preheader +# %bb.842: # %vector.body3664.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -3264 -.LBB5_843: # %vector.body3643 +.LBB5_843: # %vector.body3664 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_843 -# %bb.844: # %vector.body3648.preheader +# %bb.844: # %vector.body3669.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3265 -.LBB5_845: # %vector.body3648 +.LBB5_845: # %vector.body3669 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10785,82 +10601,78 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_845 b .LBB5_573 -.LBB5_846: # %vector.body3633.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_847: # %vector.body3633 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_847 - b .LBB5_573 -.LBB5_848: # %vector.body3623.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 +.LBB5_846: # %vector.body3651.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_849: # %vector.body3623 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_849 -# %bb.850: # %vector.body3628.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_851: # %vector.body3628 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_847: # %vector.body3651 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_847 + b .LBB5_573 +.LBB5_848: # %vector.body3635.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_849: # %vector.body3635 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_849 +# %bb.850: # %vector.body3643.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_851: # %vector.body3643 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_851 b .LBB5_573 .Lfunc_end5: diff --git a/results/MultiSource/Benchmarks/TSVC/GlobalDataFlow-flt/CMakeFiles/GlobalDataFlow-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/GlobalDataFlow-flt/CMakeFiles/GlobalDataFlow-flt.dir/tsc.s index 1f7923e..1d49eb1 100644 --- a/results/MultiSource/Benchmarks/TSVC/GlobalDataFlow-flt/CMakeFiles/GlobalDataFlow-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/GlobalDataFlow-flt/CMakeFiles/GlobalDataFlow-flt.dir/tsc.s @@ -11,6 +11,11 @@ .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI0_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1d .p2align 5 @@ -63,30 +68,29 @@ set1d: # @set1d bnez $a1, .LBB0_3 b .LBB0_8 .LBB0_4: # %vector.body40.preheader - addi.d $a0, $a0, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a1, %pc_hi20(.LCPI0_2) + vld $vr0, $a1, %pc_lo12(.LCPI0_2) + addi.d $a0, $a0, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB0_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB0_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB0_5 b .LBB0_8 .LBB0_6: # %.preheader.preheader move $a2, $zero @@ -118,6 +122,11 @@ set1d: # @set1d .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI1_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1ds .p2align 5 @@ -170,30 +179,29 @@ set1ds: # @set1ds bnez $a0, .LBB1_3 b .LBB1_8 .LBB1_4: # %vector.body40.preheader - addi.d $a0, $a1, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) + addi.d $a0, $a1, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB1_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB1_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB1_5 b .LBB1_8 .LBB1_6: # %.preheader.preheader move $a0, $zero @@ -685,14 +693,19 @@ check: # @check .word .LBB4_14-.LJTI4_0 .word .LBB4_17-.LJTI4_0 # -- End function - .section .rodata.cst32,"aM",@progbits,32 - .p2align 5, 0x0 # -- Begin function init + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 # -- Begin function init .LCPI5_0: + .dword 0 # 0x0 + .dword 1 # 0x1 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 +.LCPI5_1: .dword 4 # 0x4 .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 -.LCPI5_1: +.LCPI5_2: .dword 0 # 0x0 .dword 1 # 0x1 .dword 2 # 0x2 @@ -703,11 +716,12 @@ check: # @check .type init,@function init: # @init # %bb.0: - addi.d $sp, $sp, -80 - st.d $ra, $sp, 72 # 8-byte Folded Spill - st.d $fp, $sp, 64 # 8-byte Folded Spill - st.d $s0, $sp, 56 # 8-byte Folded Spill - st.d $s1, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -96 + st.d $ra, $sp, 88 # 8-byte Folded Spill + st.d $fp, $sp, 80 # 8-byte Folded Spill + st.d $s0, $sp, 72 # 8-byte Folded Spill + st.d $s1, $sp, 64 # 8-byte Folded Spill + st.d $s2, $sp, 56 # 8-byte Folded Spill move $fp, $a0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a1, $a0, %pc_lo12(.L.str.1) @@ -1661,131 +1675,122 @@ init: # @init jirl $ra, $ra, 0 bnez $a0, .LBB5_573 # %bb.136: # %vector.body.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 + addi.d $a3, $a0, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 .LBB5_137: # %vector.body # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_137 # %bb.138: # %vector.body3598.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 + lu12i.w $a3, 31 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 .LBB5_139: # %vector.body3598 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_139 -# %bb.140: # %vector.body3603.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_141: # %vector.body3603 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.140: # %vector.body3606.preheader + lu12i.w $a3, 62 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_141: # %vector.body3606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_141 -# %bb.142: # %vector.body3608.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_143: # %vector.body3608 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.142: # %vector.body3614.preheader + lu12i.w $a3, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_143: # %vector.body3614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_143 -# %bb.144: # %vector.body3613.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_145: # %vector.body3613 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 - bnez $a1, .LBB5_145 +# %bb.144: # %vector.body3622.preheader + lu12i.w $a3, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a3, 136 + add.d $a1, $a0, $a1 + ori $a2, $a2, 3328 +.LBB5_145: # %vector.body3622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_145 # %bb.146: # %.preheader34.i2938.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -1837,7 +1842,7 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_147 b .LBB5_573 -.LBB5_148: # %vector.body5445.preheader +.LBB5_148: # %vector.body5846.preheader pcalau12i $a0, %pc_hi20(X+4) addi.d $a0, $a0, %pc_lo12(X+4) pcalau12i $a1, %pc_hi20(V+4) @@ -1852,7 +1857,7 @@ init: # @init lu12i.w $a6, 7 ori $a6, $a6, 3328 .p2align 4, , 16 -.LBB5_149: # %vector.body5445 +.LBB5_149: # %vector.body5846 # =>This Inner Loop Header: Depth=1 bstrpick.d $a7, $a5, 31, 0 addi.d $t0, $a5, -5 @@ -1896,7 +1901,7 @@ init: # @init addi.d $a4, $a4, 8 bnez $a6, .LBB5_149 b .LBB5_573 -.LBB5_150: # %vector.body5412.preheader +.LBB5_150: # %vector.body5813.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -1904,24 +1909,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_151: # %vector.body5412 +.LBB5_151: # %vector.body5813 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_151 -# %bb.152: # %vector.body5417.preheader +# %bb.152: # %vector.body5818.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_153: # %vector.body5417 +.LBB5_153: # %vector.body5818 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1952,15 +1957,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_153 -# %bb.154: # %vector.body5424.preheader +# %bb.154: # %vector.body5825.preheader lu12i.w $a4, 62 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 2096 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_155: # %vector.body5424 +.LBB5_155: # %vector.body5825 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1991,15 +1996,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_155 -# %bb.156: # %vector.body5431.preheader +# %bb.156: # %vector.body5832.preheader lu12i.w $a4, 93 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 3152 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_157: # %vector.body5431 +.LBB5_157: # %vector.body5832 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2030,15 +2035,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_157 -# %bb.158: # %vector.body5438.preheader +# %bb.158: # %vector.body5839.preheader lu12i.w $a4, 125 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_159: # %vector.body5438 +.LBB5_159: # %vector.body5839 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2070,7 +2075,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_159 b .LBB5_573 -.LBB5_160: # %vector.body5400.preheader +.LBB5_160: # %vector.body5801.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2078,24 +2083,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_161: # %vector.body5400 +.LBB5_161: # %vector.body5801 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_161 -# %bb.162: # %vector.body5405.preheader +# %bb.162: # %vector.body5806.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_163: # %vector.body5405 +.LBB5_163: # %vector.body5806 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2127,7 +2132,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_163 b .LBB5_573 -.LBB5_164: # %vector.body5388.preheader +.LBB5_164: # %vector.body5789.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2135,24 +2140,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_165: # %vector.body5388 +.LBB5_165: # %vector.body5789 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_165 -# %bb.166: # %vector.body5393.preheader +# %bb.166: # %vector.body5794.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_167: # %vector.body5393 +.LBB5_167: # %vector.body5794 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2288,7 +2293,7 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_171 b .LBB5_573 -.LBB5_172: # %vector.body5354.preheader +.LBB5_172: # %vector.body5755.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2296,7 +2301,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_173: # %vector.body5354 +.LBB5_173: # %vector.body5755 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2436,14 +2441,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_179 b .LBB5_573 -.LBB5_180: # %vector.body5349.preheader +.LBB5_180: # %vector.body5750.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_181: # %vector.body5349 +.LBB5_181: # %vector.body5750 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -2451,7 +2456,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_181 b .LBB5_573 -.LBB5_182: # %vector.body5339.preheader +.LBB5_182: # %vector.body5740.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2459,7 +2464,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_183: # %vector.body5339 +.LBB5_183: # %vector.body5740 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2610,30 +2615,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_189 b .LBB5_573 -.LBB5_190: # %vector.body5315.preheader +.LBB5_190: # %vector.body5716.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_191: # %vector.body5315 +.LBB5_191: # %vector.body5716 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_191 -# %bb.192: # %vector.body5320.preheader +# %bb.192: # %vector.body5721.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_193: # %vector.body5320 +.LBB5_193: # %vector.body5721 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2665,30 +2670,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_193 b .LBB5_573 -.LBB5_194: # %vector.body5303.preheader +.LBB5_194: # %vector.body5704.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_195: # %vector.body5303 +.LBB5_195: # %vector.body5704 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_195 -# %bb.196: # %vector.body5308.preheader +# %bb.196: # %vector.body5709.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_197: # %vector.body5308 +.LBB5_197: # %vector.body5709 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2734,74 +2739,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_199: # %vector.body5283 +.LBB5_199: # %vector.body5678 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_199 -# %bb.200: # %vector.body5288.preheader +# %bb.200: # %vector.body5683.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_201: # %vector.body5288 +.LBB5_201: # %vector.body5683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_201 -# %bb.202: # %vector.body5293.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.202: # %vector.body5688.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_203: # %vector.body5293 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_203: # %vector.body5688 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_203 -# %bb.204: # %vector.body5298.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.204: # %vector.body5696.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_205: # %vector.body5298 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_205: # %vector.body5696 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_205 b .LBB5_573 .LBB5_206: # %.preheader.i1195.preheader @@ -2818,74 +2820,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_207: # %vector.body5263 +.LBB5_207: # %vector.body5652 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_207 -# %bb.208: # %vector.body5268.preheader +# %bb.208: # %vector.body5657.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_209: # %vector.body5268 +.LBB5_209: # %vector.body5657 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_209 -# %bb.210: # %vector.body5273.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.210: # %vector.body5662.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_211: # %vector.body5273 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_211: # %vector.body5662 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_211 -# %bb.212: # %vector.body5278.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.212: # %vector.body5670.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_213: # %vector.body5278 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_213: # %vector.body5670 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_213 b .LBB5_573 .LBB5_214: # %.preheader.i1217.preheader @@ -3072,31 +3071,30 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_221 -# %bb.222: # %vector.body5236.preheader - pcalau12i $a1, %pc_hi20(array+4) - addi.d $a1, $a1, %pc_lo12(array+4) - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_223: # %vector.body5236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_223 +# %bb.222: # %vector.body5622.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(array+8) + addi.d $a1, $a1, %pc_lo12(array+8) + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_223: # %vector.body5622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_223 # %bb.224: # %.preheader34.i1253.preheader lu12i.w $a1, 284 ori $a1, $a1, 1888 @@ -3162,87 +3160,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_227: # %vector.body5211 +.LBB5_227: # %vector.body5588 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_227 -# %bb.228: # %vector.body5216.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.228: # %vector.body5593.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_229: # %vector.body5216 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_229: # %vector.body5593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_229 -# %bb.230: # %vector.body5221.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.230: # %vector.body5601.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_231: # %vector.body5221 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_231: # %vector.body5601 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_231 -# %bb.232: # %vector.body5226.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.232: # %vector.body5609.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_233: # %vector.body5226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_233: # %vector.body5609 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_233 b .LBB5_573 .LBB5_234: # %.preheader.i1284.preheader @@ -3259,32 +3252,32 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -3264 -.LBB5_235: # %vector.body5196 +.LBB5_235: # %vector.body5573 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_235 -# %bb.236: # %vector.body5201.preheader +# %bb.236: # %vector.body5578.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_237: # %vector.body5201 +.LBB5_237: # %vector.body5578 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_237 -# %bb.238: # %vector.body5206.preheader +# %bb.238: # %vector.body5583.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_239: # %vector.body5206 +.LBB5_239: # %vector.body5583 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -3292,30 +3285,30 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_239 b .LBB5_573 -.LBB5_240: # %vector.body5184.preheader +.LBB5_240: # %vector.body5561.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_241: # %vector.body5184 +.LBB5_241: # %vector.body5561 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_241 -# %bb.242: # %vector.body5189.preheader +# %bb.242: # %vector.body5566.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_243: # %vector.body5189 +.LBB5_243: # %vector.body5566 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3392,65 +3385,62 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_245 -# %bb.246: # %vector.body5174.preheader +# %bb.246: # %vector.body5545.preheader lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_247: # %vector.body5174 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_247: # %vector.body5545 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_247 -# %bb.248: # %vector.body5179.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.248: # %vector.body5553.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_249: # %vector.body5179 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_249: # %vector.body5553 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_249 b .LBB5_573 -.LBB5_250: # %vector.body5157.preheader +.LBB5_250: # %vector.body5528.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_251: # %vector.body5157 +.LBB5_251: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3506,30 +3496,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_253 b .LBB5_573 -.LBB5_254: # %vector.body5145.preheader +.LBB5_254: # %vector.body5516.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_255: # %vector.body5145 +.LBB5_255: # %vector.body5516 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_255 -# %bb.256: # %vector.body5150.preheader +# %bb.256: # %vector.body5521.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_257: # %vector.body5150 +.LBB5_257: # %vector.body5521 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3561,14 +3551,14 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_257 b .LBB5_573 -.LBB5_258: # %vector.body5125.preheader +.LBB5_258: # %vector.body5487.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 xvldi $xr0, -1424 -.LBB5_259: # %vector.body5125 +.LBB5_259: # %vector.body5487 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3583,274 +3573,263 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 + lu12i.w $a1, 62 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 2104 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_261: # %vector.body5130 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_261: # %vector.body5492 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_261 -# %bb.262: # %vector.body5135.preheader - lu12i.w $a0, 93 - ori $a0, $a0, 3156 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 +# %bb.262: # %vector.body5500.preheader + lu12i.w $a1, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 3160 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_263: # %vector.body5135 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_263: # %vector.body5500 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_263 -# %bb.264: # %vector.body5140.preheader - lu12i.w $a0, 125 - ori $a0, $a0, 132 +# %bb.264: # %vector.body5508.preheader + lu12i.w $a1, 125 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a1, 136 add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - ori $a2, $s0, 3328 -.LBB5_265: # %vector.body5140 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_265 + ori $a1, $s0, 3328 +.LBB5_265: # %vector.body5508 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_265 b .LBB5_573 -.LBB5_266: # %vector.body5094.preheader +.LBB5_266: # %vector.body5450.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_267: # %vector.body5094 +.LBB5_267: # %vector.body5450 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_267 -# %bb.268: # %vector.body5099.preheader +# %bb.268: # %vector.body5455.preheader lu12i.w $a2, 31 ori $a3, $a2, 1048 add.d $a4, $a0, $a3 lu12i.w $a3, 3 ori $a5, $a3, 3712 lu12i.w $a6, 260096 -.LBB5_269: # %vector.body5099 +.LBB5_269: # %vector.body5455 # =>This Inner Loop Header: Depth=1 st.w $a6, $a4, -8 st.w $a6, $a4, 0 addi.d $a5, $a5, -2 addi.d $a4, $a4, 16 bnez $a5, .LBB5_269 -# %bb.270: # %vector.body5104.preheader +# %bb.270: # %vector.body5460.preheader ori $a2, $a2, 1052 add.d $a2, $a0, $a2 ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 -.LBB5_271: # %vector.body5104 +.LBB5_271: # %vector.body5460 # =>This Inner Loop Header: Depth=1 st.w $a4, $a2, -8 st.w $a4, $a2, 0 addi.d $a3, $a3, -2 addi.d $a2, $a2, 16 bnez $a3, .LBB5_271 -# %bb.272: # %vector.body5110.preheader +# %bb.272: # %vector.body5466.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_273: # %vector.body5110 +.LBB5_273: # %vector.body5466 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_273 -# %bb.274: # %vector.body5115.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.274: # %vector.body5471.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_275: # %vector.body5115 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_275: # %vector.body5471 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_275 -# %bb.276: # %vector.body5120.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.276: # %vector.body5479.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_277: # %vector.body5120 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_277: # %vector.body5479 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_277 b .LBB5_573 -.LBB5_278: # %vector.body5079.preheader +.LBB5_278: # %vector.body5429.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_279: # %vector.body5079 +.LBB5_279: # %vector.body5429 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_279 -# %bb.280: # %vector.body5084.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.280: # %vector.body5434.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_281: # %vector.body5084 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_281: # %vector.body5434 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_281 -# %bb.282: # %vector.body5089.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.282: # %vector.body5442.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_283: # %vector.body5089 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_283: # %vector.body5442 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_283 b .LBB5_573 -.LBB5_284: # %vector.body5067.preheader +.LBB5_284: # %vector.body5417.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_285: # %vector.body5067 +.LBB5_285: # %vector.body5417 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_285 -# %bb.286: # %vector.body5072.preheader +# %bb.286: # %vector.body5422.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_287: # %vector.body5072 +.LBB5_287: # %vector.body5422 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3882,30 +3861,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_287 b .LBB5_573 -.LBB5_288: # %vector.body5055.preheader +.LBB5_288: # %vector.body5405.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_289: # %vector.body5055 +.LBB5_289: # %vector.body5405 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_289 -# %bb.290: # %vector.body5060.preheader +# %bb.290: # %vector.body5410.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_291: # %vector.body5060 +.LBB5_291: # %vector.body5410 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3937,30 +3916,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_291 b .LBB5_573 -.LBB5_292: # %vector.body5043.preheader +.LBB5_292: # %vector.body5393.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_293: # %vector.body5043 +.LBB5_293: # %vector.body5393 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_293 -# %bb.294: # %vector.body5048.preheader +# %bb.294: # %vector.body5398.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_295: # %vector.body5048 +.LBB5_295: # %vector.body5398 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3992,30 +3971,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_295 b .LBB5_573 -.LBB5_296: # %vector.body5031.preheader +.LBB5_296: # %vector.body5381.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_297: # %vector.body5031 +.LBB5_297: # %vector.body5381 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_297 -# %bb.298: # %vector.body5036.preheader +# %bb.298: # %vector.body5386.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_299: # %vector.body5036 +.LBB5_299: # %vector.body5386 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4047,30 +4026,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_299 b .LBB5_573 -.LBB5_300: # %vector.body5019.preheader +.LBB5_300: # %vector.body5369.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_301: # %vector.body5019 +.LBB5_301: # %vector.body5369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_301 -# %bb.302: # %vector.body5024.preheader +# %bb.302: # %vector.body5374.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_303: # %vector.body5024 +.LBB5_303: # %vector.body5374 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4102,69 +4081,66 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_303 b .LBB5_573 -.LBB5_304: # %vector.body5004.preheader +.LBB5_304: # %vector.body5348.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_305: # %vector.body5004 +.LBB5_305: # %vector.body5348 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_305 -# %bb.306: # %vector.body5009.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.306: # %vector.body5353.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_307: # %vector.body5009 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_307: # %vector.body5353 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_307 -# %bb.308: # %vector.body5014.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.308: # %vector.body5361.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_309: # %vector.body5014 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_309: # %vector.body5361 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_309 b .LBB5_573 .LBB5_310: # %.preheader.i1469.preheader @@ -4181,254 +4157,241 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_311: # %vector.body4984 +.LBB5_311: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_311 -# %bb.312: # %vector.body4989.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.312: # %vector.body5324.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_313: # %vector.body4989 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_313: # %vector.body5324 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_313 -# %bb.314: # %vector.body4994.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.314: # %vector.body5332.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_315: # %vector.body4994 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_315: # %vector.body5332 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_315 -# %bb.316: # %vector.body4999.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.316: # %vector.body5340.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_317: # %vector.body4999 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_317: # %vector.body5340 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_317 b .LBB5_573 -.LBB5_318: # %vector.body4964.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_319: # %vector.body4964 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_318: # %vector.body5293.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_319: # %vector.body5293 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_319 -# %bb.320: # %vector.body4969.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1072 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 +# %bb.320: # %vector.body5301.preheader + lu12i.w $a3, 31 + ori $a3, $a3, 1072 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 xvldi $xr0, -1424 -.LBB5_321: # %vector.body4969 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_321 -# %bb.322: # %vector.body4974.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2128 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 -.LBB5_323: # %vector.body4974 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_323 -# %bb.324: # %vector.body4979.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_325: # %vector.body4979 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_321: # %vector.body5301 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_321 +# %bb.322: # %vector.body5306.preheader + lu12i.w $a3, 62 + ori $a3, $a3, 2128 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 +.LBB5_323: # %vector.body5306 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_323 +# %bb.324: # %vector.body5311.preheader + lu12i.w $a3, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 3160 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_325: # %vector.body5311 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_325 b .LBB5_573 -.LBB5_326: # %vector.body4944.preheader +.LBB5_326: # %vector.body5264.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_327: # %vector.body4944 +.LBB5_327: # %vector.body5264 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_327 -# %bb.328: # %vector.body4949.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.328: # %vector.body5269.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_329: # %vector.body4949 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_329: # %vector.body5269 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_329 -# %bb.330: # %vector.body4954.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.330: # %vector.body5277.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_331: # %vector.body4954 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_331: # %vector.body5277 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_331 -# %bb.332: # %vector.body4959.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.332: # %vector.body5285.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_333: # %vector.body4959 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_333: # %vector.body5285 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_333 b .LBB5_573 .LBB5_334: # %.preheader.i1529.preheader @@ -4445,19 +4408,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_335: # %vector.body4934 +.LBB5_335: # %vector.body5254 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_335 -# %bb.336: # %vector.body4939.preheader +# %bb.336: # %vector.body5259.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_337: # %vector.body4939 +.LBB5_337: # %vector.body5259 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -4915,69 +4878,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_358 b .LBB5_573 -.LBB5_359: # %vector.body4850.preheader +.LBB5_359: # %vector.body5164.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_360: # %vector.body4850 +.LBB5_360: # %vector.body5164 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_360 -# %bb.361: # %vector.body4855.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.361: # %vector.body5169.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_362: # %vector.body4855 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a5 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_362: # %vector.body5169 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a3, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_362 -# %bb.363: # %vector.body4860.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.363: # %vector.body5177.preheader + lu12i.w $a3, 62 + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 ori $a1, $a1, 3328 -.LBB5_364: # %vector.body4860 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a4 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 +.LBB5_364: # %vector.body5177 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a2, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a1, $a1, -4 + addi.d $a2, $a2, 16 bnez $a1, .LBB5_364 # %bb.365: # %.preheader.i1631.preheader lu12i.w $a1, 156 @@ -5071,50 +5031,50 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_368 b .LBB5_573 -.LBB5_369: # %vector.body4830.preheader +.LBB5_369: # %vector.body5144.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_370: # %vector.body4830 +.LBB5_370: # %vector.body5144 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_370 -# %bb.371: # %vector.body4835.preheader +# %bb.371: # %vector.body5149.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_372: # %vector.body4835 +.LBB5_372: # %vector.body5149 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_372 -# %bb.373: # %vector.body4840.preheader +# %bb.373: # %vector.body5154.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_374: # %vector.body4840 +.LBB5_374: # %vector.body5154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_374 -# %bb.375: # %vector.body4845.preheader +# %bb.375: # %vector.body5159.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_376: # %vector.body4845 +.LBB5_376: # %vector.body5159 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5122,7 +5082,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_376 b .LBB5_573 -.LBB5_377: # %vector.body4810.preheader +.LBB5_377: # %vector.body5124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -5131,43 +5091,43 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_378: # %vector.body4810 +.LBB5_378: # %vector.body5124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_378 -# %bb.379: # %vector.body4815.preheader +# %bb.379: # %vector.body5129.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_380: # %vector.body4815 +.LBB5_380: # %vector.body5129 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_380 -# %bb.381: # %vector.body4820.preheader +# %bb.381: # %vector.body5134.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_382: # %vector.body4820 +.LBB5_382: # %vector.body5134 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_382 -# %bb.383: # %vector.body4825.preheader +# %bb.383: # %vector.body5139.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_384: # %vector.body4825 +.LBB5_384: # %vector.body5139 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5189,87 +5149,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_386: # %vector.body4790 +.LBB5_386: # %vector.body5095 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_386 -# %bb.387: # %vector.body4795.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.387: # %vector.body5100.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_388: # %vector.body4795 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_388: # %vector.body5100 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_388 -# %bb.389: # %vector.body4800.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.389: # %vector.body5108.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_390: # %vector.body4800 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_390: # %vector.body5108 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_390 -# %bb.391: # %vector.body4805.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.391: # %vector.body5116.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_392: # %vector.body4805 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_392: # %vector.body5116 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_392 b .LBB5_573 .LBB5_393: # %.preheader.i1701.preheader @@ -5286,14 +5241,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_394: # %vector.body4775 +.LBB5_394: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_394 -# %bb.395: # %vector.body4780.preheader +# %bb.395: # %vector.body5085.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -5301,19 +5256,19 @@ init: # @init lu12i.w $a3, 219235 ori $a3, $a3, 1981 xvreplgr2vr.w $xr0, $a3 -.LBB5_396: # %vector.body4780 +.LBB5_396: # %vector.body5085 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_396 -# %bb.397: # %vector.body4785.preheader +# %bb.397: # %vector.body5090.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_398: # %vector.body4785 +.LBB5_398: # %vector.body5090 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5335,87 +5290,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_400: # %vector.body4755 +.LBB5_400: # %vector.body5051 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_400 -# %bb.401: # %vector.body4760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.401: # %vector.body5056.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_402: # %vector.body4760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_402: # %vector.body5056 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_402 -# %bb.403: # %vector.body4765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.403: # %vector.body5064.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_404: # %vector.body4765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_404: # %vector.body5064 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_404 -# %bb.405: # %vector.body4770.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.405: # %vector.body5072.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_406: # %vector.body4770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_406: # %vector.body5072 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_406 b .LBB5_573 .LBB5_407: # %.preheader.i1740.preheader @@ -5432,19 +5382,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_408: # %vector.body4745 +.LBB5_408: # %vector.body5041 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_408 -# %bb.409: # %vector.body4750.preheader +# %bb.409: # %vector.body5046.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_410: # %vector.body4750 +.LBB5_410: # %vector.body5046 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5452,21 +5402,21 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_410 b .LBB5_573 -.LBB5_411: # %vector.body4725.preheader +.LBB5_411: # %vector.body5018.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_412: # %vector.body4725 +.LBB5_412: # %vector.body5018 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_412 -# %bb.413: # %vector.body4730.preheader +# %bb.413: # %vector.body5023.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 @@ -5474,49 +5424,48 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr1, $a4 -.LBB5_414: # %vector.body4730 +.LBB5_414: # %vector.body5023 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a2, -32 xvst $xr1, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_414 -# %bb.415: # %vector.body4735.preheader +# %bb.415: # %vector.body5028.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_416: # %vector.body4735 +.LBB5_416: # %vector.body5028 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_416 -# %bb.417: # %vector.body4740.preheader +# %bb.417: # %vector.body5033.preheader lu12i.w $a2, 93 - ori $a2, $a2, 3156 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_418: # %vector.body4740 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_418: # %vector.body5033 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_418 b .LBB5_573 .LBB5_419: # %.preheader.i1769.preheader @@ -5533,7 +5482,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_420: # %vector.body4720 +.LBB5_420: # %vector.body5013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5555,7 +5504,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_422: # %vector.body4715 +.LBB5_422: # %vector.body5008 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5563,14 +5512,14 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_422 b .LBB5_573 -.LBB5_423: # %vector.body4700.preheader +.LBB5_423: # %vector.body4993.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_424: # %vector.body4700 +.LBB5_424: # %vector.body4993 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5663,14 +5612,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_428 b .LBB5_573 -.LBB5_429: # %vector.body4685.preheader +.LBB5_429: # %vector.body4978.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_430: # %vector.body4685 +.LBB5_430: # %vector.body4978 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5763,32 +5712,31 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_434 b .LBB5_573 -.LBB5_435: # %vector.body4663.preheader +.LBB5_435: # %vector.body4947.preheader + pcalau12i $s1, %pc_hi20(.LCPI5_0) + vld $vr0, $s1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) - addi.d $a0, $s0, 4 - ori $a1, $zero, 2 - lu12i.w $s1, 7 - ori $a2, $s1, 3328 -.LBB5_436: # %vector.body4663 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_436 + addi.d $a0, $s0, 8 + lu12i.w $s2, 7 + ori $a1, $s2, 3328 +.LBB5_436: # %vector.body4947 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_436 # %bb.437: # %.preheader.i1826.preheader lu12i.w $a1, 31 ori $a0, $a1, 1040 @@ -5799,54 +5747,50 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 2104 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_438: # %vector.body4668 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_438 -# %bb.439: # %vector.body4673.preheader + ori $a1, $s2, 3328 +.LBB5_438: # %vector.body4955 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_438 +# %bb.439: # %vector.body4963.preheader lu12i.w $a0, 93 - ori $a0, $a0, 3156 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 3160 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_440: # %vector.body4673 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_440 + ori $a1, $s2, 3328 +.LBB5_440: # %vector.body4963 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_440 # %bb.441: # %.preheader.i1840.preheader lu12i.w $a0, 125 ori $a0, $a0, 128 @@ -5905,30 +5849,30 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_442 b .LBB5_573 -.LBB5_443: # %vector.body4639.preheader +.LBB5_443: # %vector.body4923.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_444: # %vector.body4639 +.LBB5_444: # %vector.body4923 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_444 -# %bb.445: # %vector.body4644.preheader +# %bb.445: # %vector.body4928.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 -.LBB5_446: # %vector.body4644 +.LBB5_446: # %vector.body4928 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5959,14 +5903,14 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_446 -# %bb.447: # %vector.body4651.preheader +# %bb.447: # %vector.body4935.preheader lu12i.w $a4, 62 - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 2096 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_448: # %vector.body4651 +.LBB5_448: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5997,12 +5941,12 @@ init: # @init addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 bnez $a3, .LBB5_448 -# %bb.449: # %vector.body4658.preheader +# %bb.449: # %vector.body4942.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_450: # %vector.body4658 +.LBB5_450: # %vector.body4942 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6010,154 +5954,148 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_450 b .LBB5_573 -.LBB5_451: # %vector.body4624.preheader +.LBB5_451: # %vector.body4902.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_452: # %vector.body4624 +.LBB5_452: # %vector.body4902 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_452 -# %bb.453: # %vector.body4629.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.453: # %vector.body4907.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_454: # %vector.body4629 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_454: # %vector.body4907 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_454 -# %bb.455: # %vector.body4634.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.455: # %vector.body4915.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_456: # %vector.body4634 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_456: # %vector.body4915 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_456 b .LBB5_573 -.LBB5_457: # %vector.body4599.preheader +.LBB5_457: # %vector.body4871.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_458: # %vector.body4599 +.LBB5_458: # %vector.body4871 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_458 -# %bb.459: # %vector.body4604.preheader +# %bb.459: # %vector.body4876.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_460: # %vector.body4604 +.LBB5_460: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_460 -# %bb.461: # %vector.body4609.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.461: # %vector.body4881.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_462: # %vector.body4609 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_462: # %vector.body4881 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_462 -# %bb.463: # %vector.body4614.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.463: # %vector.body4889.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_464: # %vector.body4614 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_464 -# %bb.465: # %vector.body4619.preheader + ori $a3, $a1, 3328 +.LBB5_464: # %vector.body4889 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 + bnez $a3, .LBB5_464 +# %bb.465: # %vector.body4897.preheader lu12i.w $a2, 125 ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3264 -.LBB5_466: # %vector.body4619 +.LBB5_466: # %vector.body4897 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6165,45 +6103,45 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_466 b .LBB5_573 -.LBB5_467: # %vector.body4574.preheader +.LBB5_467: # %vector.body4843.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_468: # %vector.body4574 +.LBB5_468: # %vector.body4843 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_468 -# %bb.469: # %vector.body4579.preheader +# %bb.469: # %vector.body4848.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_470: # %vector.body4579 +.LBB5_470: # %vector.body4848 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_470 -# %bb.471: # %vector.body4584.preheader +# %bb.471: # %vector.body4853.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_472: # %vector.body4584 +.LBB5_472: # %vector.body4853 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_472 -# %bb.473: # %vector.body4589.preheader +# %bb.473: # %vector.body4858.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a2, $a0, $a2 @@ -6211,37 +6149,36 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_474: # %vector.body4589 +.LBB5_474: # %vector.body4858 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_474 -# %bb.475: # %vector.body4594.preheader +# %bb.475: # %vector.body4863.preheader lu12i.w $a2, 125 - ori $a2, $a2, 132 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_476: # %vector.body4594 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_476: # %vector.body4863 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_476 b .LBB5_573 .LBB5_477: # %.preheader.i1927.preheader @@ -6258,74 +6195,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_478: # %vector.body4554 +.LBB5_478: # %vector.body4817 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_478 -# %bb.479: # %vector.body4559.preheader +# %bb.479: # %vector.body4822.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_480: # %vector.body4559 +.LBB5_480: # %vector.body4822 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_480 -# %bb.481: # %vector.body4564.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.481: # %vector.body4827.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_482: # %vector.body4564 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_482: # %vector.body4827 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_482 -# %bb.483: # %vector.body4569.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.483: # %vector.body4835.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_484: # %vector.body4569 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_484: # %vector.body4835 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_484 b .LBB5_573 .LBB5_485: # %.preheader.i1949.preheader @@ -6461,669 +6395,638 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_490 b .LBB5_573 -.LBB5_491: # %vector.body4519.preheader +.LBB5_491: # %vector.body4773.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_492: # %vector.body4519 +.LBB5_492: # %vector.body4773 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_492 -# %bb.493: # %vector.body4524.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.493: # %vector.body4778.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_494: # %vector.body4524 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_494: # %vector.body4778 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_494 -# %bb.495: # %vector.body4529.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.495: # %vector.body4786.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_496: # %vector.body4529 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_496: # %vector.body4786 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_496 -# %bb.497: # %vector.body4534.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.497: # %vector.body4794.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_498: # %vector.body4534 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_498: # %vector.body4794 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_498 b .LBB5_573 -.LBB5_499: # %vector.body4489.preheader +.LBB5_499: # %vector.body4734.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_500: # %vector.body4489 +.LBB5_500: # %vector.body4734 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_500 -# %bb.501: # %vector.body4494.preheader +# %bb.501: # %vector.body4739.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_502: # %vector.body4494 +.LBB5_502: # %vector.body4739 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_502 -# %bb.503: # %vector.body4499.preheader +# %bb.503: # %vector.body4744.preheader lu12i.w $a2, 46 ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_504: # %vector.body4499 +.LBB5_504: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_504 -# %bb.505: # %vector.body4504.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.505: # %vector.body4749.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_506: # %vector.body4504 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_506: # %vector.body4749 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_506 -# %bb.507: # %vector.body4509.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.507: # %vector.body4757.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_508: # %vector.body4509 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_508: # %vector.body4757 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_508 -# %bb.509: # %vector.body4514.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.509: # %vector.body4765.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_510: # %vector.body4514 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_510: # %vector.body4765 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_510 b .LBB5_573 -.LBB5_511: # %vector.body4459.preheader +.LBB5_511: # %vector.body4695.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_512: # %vector.body4459 +.LBB5_512: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_512 -# %bb.513: # %vector.body4464.preheader +# %bb.513: # %vector.body4700.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_514: # %vector.body4464 +.LBB5_514: # %vector.body4700 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_514 -# %bb.515: # %vector.body4469.preheader +# %bb.515: # %vector.body4705.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_516: # %vector.body4469 +.LBB5_516: # %vector.body4705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_516 -# %bb.517: # %vector.body4474.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.517: # %vector.body4710.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_518: # %vector.body4474 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_518: # %vector.body4710 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_518 -# %bb.519: # %vector.body4479.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.519: # %vector.body4718.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_520: # %vector.body4479 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_520: # %vector.body4718 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_520 -# %bb.521: # %vector.body4484.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.521: # %vector.body4726.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_522: # %vector.body4484 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_522: # %vector.body4726 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_522 b .LBB5_573 -.LBB5_523: # %vector.body4429.preheader +.LBB5_523: # %vector.body4656.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_524: # %vector.body4429 +.LBB5_524: # %vector.body4656 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_524 -# %bb.525: # %vector.body4434.preheader +# %bb.525: # %vector.body4661.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_526: # %vector.body4434 +.LBB5_526: # %vector.body4661 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_526 -# %bb.527: # %vector.body4439.preheader +# %bb.527: # %vector.body4666.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_528: # %vector.body4439 +.LBB5_528: # %vector.body4666 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_528 -# %bb.529: # %vector.body4444.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.529: # %vector.body4671.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_530: # %vector.body4444 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_530: # %vector.body4671 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_530 -# %bb.531: # %vector.body4449.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.531: # %vector.body4679.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_532: # %vector.body4449 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_532: # %vector.body4679 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_532 -# %bb.533: # %vector.body4454.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.533: # %vector.body4687.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_534: # %vector.body4454 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_534: # %vector.body4687 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_534 b .LBB5_573 -.LBB5_535: # %vector.body4404.preheader +.LBB5_535: # %vector.body4622.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_536: # %vector.body4404 +.LBB5_536: # %vector.body4622 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_536 -# %bb.537: # %vector.body4409.preheader +# %bb.537: # %vector.body4627.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_538: # %vector.body4409 +.LBB5_538: # %vector.body4627 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_538 -# %bb.539: # %vector.body4414.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.539: # %vector.body4632.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_540: # %vector.body4414 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_540: # %vector.body4632 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_540 -# %bb.541: # %vector.body4419.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.541: # %vector.body4640.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_542: # %vector.body4419 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_542: # %vector.body4640 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_542 -# %bb.543: # %vector.body4424.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.543: # %vector.body4648.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_544: # %vector.body4424 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_544: # %vector.body4648 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_544 b .LBB5_573 -.LBB5_545: # %vector.body4389.preheader +.LBB5_545: # %vector.body4601.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_546: # %vector.body4389 +.LBB5_546: # %vector.body4601 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_546 -# %bb.547: # %vector.body4394.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.547: # %vector.body4606.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_548: # %vector.body4394 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_548: # %vector.body4606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_548 -# %bb.549: # %vector.body4399.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.549: # %vector.body4614.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_550: # %vector.body4399 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_550: # %vector.body4614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_550 b .LBB5_573 -.LBB5_551: # %vector.body4374.preheader +.LBB5_551: # %vector.body4580.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_552: # %vector.body4374 +.LBB5_552: # %vector.body4580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_552 -# %bb.553: # %vector.body4379.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.553: # %vector.body4585.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_554: # %vector.body4379 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_554: # %vector.body4585 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_554 -# %bb.555: # %vector.body4384.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.555: # %vector.body4593.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_556: # %vector.body4384 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_556: # %vector.body4593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_556 b .LBB5_573 .LBB5_557: # %.preheader.i2121.preheader @@ -7140,19 +7043,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_558: # %vector.body4364 +.LBB5_558: # %vector.body4570 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_558 -# %bb.559: # %vector.body4369.preheader +# %bb.559: # %vector.body4575.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_560: # %vector.body4369 +.LBB5_560: # %vector.body4575 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7174,7 +7077,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_562: # %vector.body4359 +.LBB5_562: # %vector.body4565 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7196,7 +7099,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_564: # %vector.body4354 +.LBB5_564: # %vector.body4560 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7204,31 +7107,30 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_564 b .LBB5_573 -.LBB5_565: # %vector.body4349.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_566: # %vector.body4349 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_566 +.LBB5_565: # %vector.body4552.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_566: # %vector.body4552 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_566 b .LBB5_573 .LBB5_567: # %.preheader.i2154.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7377,43 +7279,43 @@ init: # @init bnez $a2, .LBB5_572 .LBB5_573: # %set1d.exit1086 move $a0, $zero - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $fp, $sp, 64 # 8-byte Folded Reload - ld.d $ra, $sp, 72 # 8-byte Folded Reload - addi.d $sp, $sp, 80 + ld.d $s2, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload + ld.d $s0, $sp, 72 # 8-byte Folded Reload + ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $ra, $sp, 88 # 8-byte Folded Reload + addi.d $sp, $sp, 96 ret .LBB5_574: # %.preheader.i2177.preheader pcalau12i $a0, %pc_hi20(global_data+640192) addi.d $a0, $a0, %pc_lo12(global_data+640192) b .LBB5_345 -.LBB5_575: # %vector.body4325.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_576: # %vector.body4325 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_576 +.LBB5_575: # %vector.body4525.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_576: # %vector.body4525 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_576 b .LBB5_573 -.LBB5_577: # %vector.body4320.preheader +.LBB5_577: # %vector.body4520.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 @@ -7421,7 +7323,7 @@ init: # @init lu12i.w $a2, 260096 ori $a2, $a2, 8 xvreplgr2vr.w $xr0, $a2 -.LBB5_578: # %vector.body4320 +.LBB5_578: # %vector.body4520 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7429,162 +7331,155 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_578 b .LBB5_573 -.LBB5_579: # %vector.body4310.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_580: # %vector.body4310 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_579: # %vector.body4504.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_580: # %vector.body4504 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_580 -# %bb.581: # %vector.body4315.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_582: # %vector.body4315 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.581: # %vector.body4512.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_582: # %vector.body4512 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_582 b .LBB5_573 -.LBB5_583: # %vector.body4305.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_584: # %vector.body4305 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_584 +.LBB5_583: # %vector.body4496.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_584: # %vector.body4496 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_584 b .LBB5_573 -.LBB5_585: # %vector.body4300.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_586: # %vector.body4300 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_586 +.LBB5_585: # %vector.body4488.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_586: # %vector.body4488 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_586 b .LBB5_573 -.LBB5_587: # %vector.body4295.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_588: # %vector.body4295 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_588 +.LBB5_587: # %vector.body4480.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_588: # %vector.body4480 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_588 b .LBB5_573 -.LBB5_589: # %vector.body4290.preheader +.LBB5_589: # %vector.body4472.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_590: # %vector.body4290 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_590 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_590: # %vector.body4472 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_590 b .LBB5_607 .LBB5_591: # %.preheader.i2230.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7603,79 +7498,74 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a1, $fp, $a0 - ori $a2, $zero, 2 + ori $a0, $a0, 2104 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + add.d $a2, $fp, $a0 lu12i.w $a0, 7 ori $a3, $a0, 3328 -.LBB5_592: # %vector.body4275 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_592: # %vector.body4448 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_592 -# %bb.593: # %vector.body4280.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.593: # %vector.body4456.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_594: # %vector.body4280 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_594: # %vector.body4456 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_594 -# %bb.595: # %vector.body4285.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.595: # %vector.body4464.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_596: # %vector.body4285 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_596: # %vector.body4464 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_596 b .LBB5_573 .LBB5_597: # %.preheader34.i2253.preheader @@ -7734,42 +7624,41 @@ init: # @init lu12i.w $a1, 220 ori $a1, $a1, 1212 b .LBB5_630 -.LBB5_600: # %vector.body4263.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_601: # %vector.body4263 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_601 - b .LBB5_573 -.LBB5_602: # %vector.body4256.preheader +.LBB5_600: # %vector.body4433.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_601: # %vector.body4433 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_601 + b .LBB5_573 +.LBB5_602: # %vector.body4426.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_603: # %vector.body4256 +.LBB5_603: # %vector.body4426 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -7804,45 +7693,44 @@ init: # @init pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) b .LBB5_610 -.LBB5_605: # %vector.body4251.preheader +.LBB5_605: # %vector.body4418.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_606: # %vector.body4251 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_606 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_606: # %vector.body4418 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_606 .LBB5_607: # %set1d.exit2229 lu12i.w $a1, 31 ori $a1, $a1, 1020 lu12i.w $a2, -262144 b .LBB5_687 -.LBB5_608: # %vector.body4246.preheader +.LBB5_608: # %vector.body4413.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_609: # %vector.body4246 +.LBB5_609: # %vector.body4413 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7855,14 +7743,14 @@ init: # @init add.d $a0, $a0, $a2 ori $a2, $a1, 1024 b .LBB5_346 -.LBB5_611: # %vector.body4241.preheader +.LBB5_611: # %vector.body4408.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_612: # %vector.body4241 +.LBB5_612: # %vector.body4408 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7886,147 +7774,141 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 b .LBB5_573 -.LBB5_614: # %vector.body4216.preheader +.LBB5_614: # %vector.body4374.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_615: # %vector.body4216 +.LBB5_615: # %vector.body4374 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_615 -# %bb.616: # %vector.body4221.preheader +# %bb.616: # %vector.body4379.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_617: # %vector.body4221 +.LBB5_617: # %vector.body4379 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_617 -# %bb.618: # %vector.body4226.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.618: # %vector.body4384.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_619: # %vector.body4226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_619: # %vector.body4384 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_619 -# %bb.620: # %vector.body4231.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.620: # %vector.body4392.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_621: # %vector.body4231 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_621: # %vector.body4392 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_621 -# %bb.622: # %vector.body4236.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.622: # %vector.body4400.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_623: # %vector.body4236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_623: # %vector.body4400 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_623 b .LBB5_573 -.LBB5_624: # %vector.body4211.preheader +.LBB5_624: # %vector.body4366.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_625: # %vector.body4211 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_625 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_625: # %vector.body4366 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_625 # %bb.626: # %set1d.exit2327 lu12i.w $a1, 31 b .LBB5_686 -.LBB5_627: # %vector.body4204.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_627: # %vector.body4359.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_628: # %vector.body4204 +.LBB5_628: # %vector.body4359 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8075,81 +7957,77 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ori $a0, $s0, 1044 + ori $a0, $s0, 1048 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_632: # %vector.body4199 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_632 - b .LBB5_573 -.LBB5_633: # %vector.body4189.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_634: # %vector.body4189 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_634 -# %bb.635: # %vector.body4194.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_636: # %vector.body4194 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_632: # %vector.body4351 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_632 + b .LBB5_573 +.LBB5_633: # %vector.body4335.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_634: # %vector.body4335 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_634 +# %bb.635: # %vector.body4343.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_636: # %vector.body4343 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_636 b .LBB5_573 .LBB5_637: # %.preheader34.i2352.preheader @@ -8248,26 +8126,26 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_640 b .LBB5_573 -.LBB5_641: # %vector.body4167.preheader +.LBB5_641: # %vector.body4313.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_642: # %vector.body4167 +.LBB5_642: # %vector.body4313 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_642 -# %bb.643: # %vector.body4172.preheader +# %bb.643: # %vector.body4318.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_644: # %vector.body4172 +.LBB5_644: # %vector.body4318 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8275,78 +8153,75 @@ init: # @init addi.d $a2, $a2, 64 bnez $a1, .LBB5_644 b .LBB5_653 -.LBB5_645: # %vector.body4157.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_646: # %vector.body4157 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_645: # %vector.body4297.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_646: # %vector.body4297 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_646 -# %bb.647: # %vector.body4162.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_648: # %vector.body4162 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.647: # %vector.body4305.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_648: # %vector.body4305 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_648 b .LBB5_573 -.LBB5_649: # %vector.body4147.preheader +.LBB5_649: # %vector.body4287.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_650: # %vector.body4147 +.LBB5_650: # %vector.body4287 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_650 -# %bb.651: # %vector.body4152.preheader +# %bb.651: # %vector.body4292.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_652: # %vector.body4152 +.LBB5_652: # %vector.body4292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8359,134 +8234,128 @@ init: # @init lu12i.w $a2, 260096 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_654: # %vector.body4132.preheader +.LBB5_654: # %vector.body4266.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_655: # %vector.body4132 +.LBB5_655: # %vector.body4266 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_655 -# %bb.656: # %vector.body4137.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.656: # %vector.body4271.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_657: # %vector.body4137 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_657: # %vector.body4271 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_657 -# %bb.658: # %vector.body4142.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.658: # %vector.body4279.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_659: # %vector.body4142 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_659: # %vector.body4279 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_659 b .LBB5_573 -.LBB5_660: # %vector.body4117.preheader +.LBB5_660: # %vector.body4245.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_661: # %vector.body4117 +.LBB5_661: # %vector.body4245 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_661 -# %bb.662: # %vector.body4122.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.662: # %vector.body4250.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_663: # %vector.body4122 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_663: # %vector.body4250 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_663 -# %bb.664: # %vector.body4127.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.664: # %vector.body4258.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_665: # %vector.body4127 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_665: # %vector.body4258 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_665 b .LBB5_573 .LBB5_666: # %.preheader.i2421.preheader @@ -8503,74 +8372,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_667: # %vector.body4097 +.LBB5_667: # %vector.body4219 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_667 -# %bb.668: # %vector.body4102.preheader +# %bb.668: # %vector.body4224.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_669: # %vector.body4102 +.LBB5_669: # %vector.body4224 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_669 -# %bb.670: # %vector.body4107.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.670: # %vector.body4229.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_671: # %vector.body4107 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_671: # %vector.body4229 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_671 -# %bb.672: # %vector.body4112.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.672: # %vector.body4237.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_673: # %vector.body4112 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_673: # %vector.body4237 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_673 b .LBB5_573 .LBB5_674: # %.preheader.i2443.preheader @@ -8719,69 +8585,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_679 b .LBB5_573 -.LBB5_680: # %vector.body4063.preheader +.LBB5_680: # %vector.body4179.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 xvldi $xr0, -1424 -.LBB5_681: # %vector.body4063 +.LBB5_681: # %vector.body4179 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a3, $a3, -16 addi.d $a1, $a1, 64 bnez $a3, .LBB5_681 -# %bb.682: # %vector.body4068.preheader +# %bb.682: # %vector.body4184.preheader lu12i.w $a1, 31 - ori $a3, $a1, 1044 - add.d $a3, $a0, $a3 - ori $a4, $zero, 2 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a4, $a1, 1048 + add.d $a4, $a0, $a4 ori $a5, $a2, 3328 -.LBB5_683: # %vector.body4068 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - addi.d $a7, $a4, -1 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa0, $a7 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a5, $a5, -2 - addi.w $a4, $a4, 2 +.LBB5_683: # %vector.body4184 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a4, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a5, $a5, -4 + addi.d $a4, $a4, 16 bnez $a5, .LBB5_683 -# %bb.684: # %vector.body4073.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2100 +# %bb.684: # %vector.body4192.preheader + lu12i.w $a4, 62 + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a3, $a4, 2104 add.d $a3, $a0, $a3 - ori $a4, $zero, 2 ori $a2, $a2, 3328 -.LBB5_685: # %vector.body4073 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a4, 31, 0 - addi.d $a6, $a4, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a2, $a2, -2 - addi.w $a4, $a4, 2 +.LBB5_685: # %vector.body4192 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a3, $a3, 16 bnez $a2, .LBB5_685 .LBB5_686: # %set1d.exit2479 ori $a1, $a1, 1020 @@ -8790,16 +8653,16 @@ init: # @init lu32i.d $a2, 0 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_688: # %vector.body4051.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_688: # %vector.body4167.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_689: # %vector.body4051 +.LBB5_689: # %vector.body4167 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8830,12 +8693,12 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_689 -# %bb.690: # %vector.body4058.preheader +# %bb.690: # %vector.body4174.preheader pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 xvldi $xr0, -1424 -.LBB5_691: # %vector.body4058 +.LBB5_691: # %vector.body4174 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -8843,28 +8706,28 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_691 b .LBB5_573 -.LBB5_692: # %vector.body4039.preheader +.LBB5_692: # %vector.body4155.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_693: # %vector.body4039 +.LBB5_693: # %vector.body4155 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_693 -# %bb.694: # %vector.body4044.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.694: # %vector.body4160.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_695: # %vector.body4044 +.LBB5_695: # %vector.body4160 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8904,15 +8767,15 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_697: # %vector.body4032 +.LBB5_697: # %vector.body4148 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8944,28 +8807,28 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_697 b .LBB5_573 -.LBB5_698: # %vector.body4020.preheader +.LBB5_698: # %vector.body4136.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_699: # %vector.body4020 +.LBB5_699: # %vector.body4136 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_699 -# %bb.700: # %vector.body4025.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.700: # %vector.body4141.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_701: # %vector.body4025 +.LBB5_701: # %vector.body4141 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8997,30 +8860,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_701 b .LBB5_573 -.LBB5_702: # %vector.body4008.preheader +.LBB5_702: # %vector.body4124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_703: # %vector.body4008 +.LBB5_703: # %vector.body4124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_703 -# %bb.704: # %vector.body4013.preheader +# %bb.704: # %vector.body4129.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_705: # %vector.body4013 +.LBB5_705: # %vector.body4129 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9052,30 +8915,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_705 b .LBB5_573 -.LBB5_706: # %vector.body3996.preheader +.LBB5_706: # %vector.body4112.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_707: # %vector.body3996 +.LBB5_707: # %vector.body4112 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_707 -# %bb.708: # %vector.body4001.preheader +# %bb.708: # %vector.body4117.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_709: # %vector.body4001 +.LBB5_709: # %vector.body4117 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9107,7 +8970,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_709 b .LBB5_573 -.LBB5_710: # %vector.body3971.preheader +.LBB5_710: # %vector.body4081.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 @@ -9115,7 +8978,7 @@ init: # @init ori $a1, $s0, 3328 xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill -.LBB5_711: # %vector.body3971 +.LBB5_711: # %vector.body4081 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9123,63 +8986,60 @@ init: # @init addi.d $a1, $a1, -16 addi.d $a0, $a0, 64 bnez $a1, .LBB5_711 -# %bb.712: # %vector.body3976.preheader +# %bb.712: # %vector.body4086.preheader lu12i.w $a1, 31 - ori $a0, $a1, 1044 - add.d $a0, $fp, $a0 - ori $a2, $zero, 2 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a2, $a1, 1048 + add.d $a2, $fp, $a2 ori $a3, $s0, 3328 -.LBB5_713: # %vector.body3976 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_713: # %vector.body4086 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_713 -# %bb.714: # %vector.body3981.preheader - lu12i.w $a0, 62 - ori $a0, $a0, 2100 +# %bb.714: # %vector.body4094.preheader + lu12i.w $a2, 62 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a2, 2104 add.d $a0, $fp, $a0 - ori $a2, $zero, 2 - ori $a3, $s0, 3328 -.LBB5_715: # %vector.body3981 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_715 -# %bb.716: # %vector.body3986.preheader + ori $a2, $s0, 3328 +.LBB5_715: # %vector.body4094 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a0, $a0, 16 + bnez $a2, .LBB5_715 +# %bb.716: # %vector.body4102.preheader lu12i.w $a0, 93 ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 xvldi $xr0, -1296 -.LBB5_717: # %vector.body3986 +.LBB5_717: # %vector.body4102 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -9198,7 +9058,7 @@ init: # @init ori $a0, $a0, 2500 add.d $a0, $fp, $a0 ori $a1, $s0, 3328 -.LBB5_719: # %vector.body3991 +.LBB5_719: # %vector.body4107 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9207,238 +9067,225 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_719 b .LBB5_573 -.LBB5_720: # %vector.body3946.preheader +.LBB5_720: # %vector.body4044.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_721: # %vector.body3946 +.LBB5_721: # %vector.body4044 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_721 -# %bb.722: # %vector.body3951.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.722: # %vector.body4049.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_723: # %vector.body3951 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_723: # %vector.body4049 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_723 -# %bb.724: # %vector.body3956.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.724: # %vector.body4057.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_725: # %vector.body3956 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_725: # %vector.body4057 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_725 -# %bb.726: # %vector.body3961.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.726: # %vector.body4065.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_727: # %vector.body3961 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_727: # %vector.body4065 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_727 -# %bb.728: # %vector.body3966.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.728: # %vector.body4073.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_729: # %vector.body3966 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_729: # %vector.body4073 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_729 b .LBB5_573 -.LBB5_730: # %vector.body3931.preheader +.LBB5_730: # %vector.body4023.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_731: # %vector.body3931 +.LBB5_731: # %vector.body4023 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_731 -# %bb.732: # %vector.body3936.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.732: # %vector.body4028.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_733: # %vector.body3936 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_733: # %vector.body4028 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_733 -# %bb.734: # %vector.body3941.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.734: # %vector.body4036.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_735: # %vector.body3941 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_735: # %vector.body4036 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_735 b .LBB5_573 -.LBB5_736: # %vector.body3921.preheader +.LBB5_736: # %vector.body4007.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_737: # %vector.body3921 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_737: # %vector.body4007 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_737 -# %bb.738: # %vector.body3926.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.738: # %vector.body4015.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_739: # %vector.body3926 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_739: # %vector.body4015 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_739 b .LBB5_573 .LBB5_740: # %.preheader.i2608.preheader @@ -9455,14 +9302,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_741: # %vector.body3911 +.LBB5_741: # %vector.body3997 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_741 -# %bb.742: # %vector.body3916.preheader +# %bb.742: # %vector.body4002.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -9470,7 +9317,7 @@ init: # @init lu12i.w $a2, 219235 ori $a2, $a2, 1981 xvreplgr2vr.w $xr0, $a2 -.LBB5_743: # %vector.body3916 +.LBB5_743: # %vector.body4002 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -9488,14 +9335,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_745: # %vector.body3904 +.LBB5_745: # %vector.body3990 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9527,248 +9374,237 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_745 b .LBB5_573 -.LBB5_746: # %vector.body3879.preheader +.LBB5_746: # %vector.body3959.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_747: # %vector.body3879 +.LBB5_747: # %vector.body3959 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_747 -# %bb.748: # %vector.body3884.preheader +# %bb.748: # %vector.body3964.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_749: # %vector.body3884 +.LBB5_749: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_749 -# %bb.750: # %vector.body3889.preheader +# %bb.750: # %vector.body3969.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_751: # %vector.body3889 +.LBB5_751: # %vector.body3969 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_751 -# %bb.752: # %vector.body3894.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.752: # %vector.body3974.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_753: # %vector.body3894 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_753: # %vector.body3974 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_753 -# %bb.754: # %vector.body3899.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.754: # %vector.body3982.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_755: # %vector.body3899 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_755: # %vector.body3982 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_755 b .LBB5_573 -.LBB5_756: # %vector.body3859.preheader +.LBB5_756: # %vector.body3930.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_757: # %vector.body3859 +.LBB5_757: # %vector.body3930 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_757 -# %bb.758: # %vector.body3864.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.758: # %vector.body3935.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_759: # %vector.body3864 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_759: # %vector.body3935 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_759 -# %bb.760: # %vector.body3869.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.760: # %vector.body3943.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_761: # %vector.body3869 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_761: # %vector.body3943 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_761 -# %bb.762: # %vector.body3874.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.762: # %vector.body3951.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_763: # %vector.body3874 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_763: # %vector.body3951 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_763 b .LBB5_573 -.LBB5_764: # %vector.body3844.preheader +.LBB5_764: # %vector.body3909.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_765: # %vector.body3844 +.LBB5_765: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_765 -# %bb.766: # %vector.body3849.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.766: # %vector.body3914.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_767: # %vector.body3849 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_767: # %vector.body3914 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_767 -# %bb.768: # %vector.body3854.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.768: # %vector.body3922.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_769: # %vector.body3854 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_769: # %vector.body3922 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_769 b .LBB5_573 .LBB5_770: # %.preheader.i2684.preheader @@ -9785,103 +9621,99 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_771: # %vector.body3829 +.LBB5_771: # %vector.body3888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_771 -# %bb.772: # %vector.body3834.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.772: # %vector.body3893.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_773: # %vector.body3834 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_773: # %vector.body3893 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_773 -# %bb.774: # %vector.body3839.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.774: # %vector.body3901.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_775: # %vector.body3839 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_775: # %vector.body3901 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_775 b .LBB5_573 -.LBB5_776: # %vector.body3819.preheader +.LBB5_776: # %vector.body3875.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 32 - lu12i.w $a2, 7 - ori $a3, $a2, 3328 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_777: # %vector.body3819 +.LBB5_777: # %vector.body3875 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 - addi.d $a1, $a1, 64 + addi.d $a2, $a2, 64 bnez $a3, .LBB5_777 -# %bb.778: # %vector.body3824.preheader - lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a0, $a0, $a1 - ori $a1, $zero, 2 - ori $a2, $a2, 3328 -.LBB5_779: # %vector.body3824 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_779 +# %bb.778: # %vector.body3880.preheader + lu12i.w $a2, 31 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 1048 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_779: # %vector.body3880 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_779 b .LBB5_573 .LBB5_780: # %.preheader.i2711.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9897,23 +9729,23 @@ init: # @init lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_781: # %vector.body3807 +.LBB5_781: # %vector.body3863 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_781 -# %bb.782: # %vector.body3812.preheader +# %bb.782: # %vector.body3868.preheader lu12i.w $a0, 62 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) pcalau12i $a2, %pc_hi20(.LCPI5_1) - xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_2) + xvld $xr1, $a2, %pc_lo12(.LCPI5_2) ori $a0, $a0, 2096 add.d $a0, $fp, $a0 ori $a1, $a1, 3328 -.LBB5_783: # %vector.body3812 +.LBB5_783: # %vector.body3868 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9959,142 +9791,135 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_785: # %vector.body3792 +.LBB5_785: # %vector.body3842 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_785 -# %bb.786: # %vector.body3797.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.786: # %vector.body3847.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_787: # %vector.body3797 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_787: # %vector.body3847 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_787 -# %bb.788: # %vector.body3802.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.788: # %vector.body3855.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_789: # %vector.body3802 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_789: # %vector.body3855 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_789 b .LBB5_573 -.LBB5_790: # %vector.body3782.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_791: # %vector.body3782 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_790: # %vector.body3826.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_791: # %vector.body3826 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_791 -# %bb.792: # %vector.body3787.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_793: # %vector.body3787 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.792: # %vector.body3834.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_793: # %vector.body3834 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_793 b .LBB5_573 -.LBB5_794: # %vector.body3770.preheader +.LBB5_794: # %vector.body3811.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_795: # %vector.body3770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_795 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_795: # %vector.body3811 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_795 # %bb.796: # %.preheader34.i2757.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -10160,127 +9985,121 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_799: # %vector.body3755 +.LBB5_799: # %vector.body3790 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_799 -# %bb.800: # %vector.body3760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.800: # %vector.body3795.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_801: # %vector.body3760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_801: # %vector.body3795 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_801 -# %bb.802: # %vector.body3765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.802: # %vector.body3803.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_803: # %vector.body3765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_803: # %vector.body3803 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_803 b .LBB5_573 -.LBB5_804: # %vector.body3740.preheader +.LBB5_804: # %vector.body3769.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_805: # %vector.body3740 +.LBB5_805: # %vector.body3769 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_805 -# %bb.806: # %vector.body3745.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.806: # %vector.body3774.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_807: # %vector.body3745 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_807: # %vector.body3774 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_807 -# %bb.808: # %vector.body3750.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.808: # %vector.body3782.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_809: # %vector.body3750 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_809: # %vector.body3782 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_809 b .LBB5_573 .LBB5_810: # %.preheader.i2797.preheader @@ -10293,14 +10112,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_811: # %vector.body3733 +.LBB5_811: # %vector.body3762 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10342,14 +10161,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_813: # %vector.body3726 +.LBB5_813: # %vector.body3755 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10391,14 +10210,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_815: # %vector.body3719 +.LBB5_815: # %vector.body3748 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10440,14 +10259,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_817: # %vector.body3712 +.LBB5_817: # %vector.body3741 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10489,14 +10308,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_819: # %vector.body3705 +.LBB5_819: # %vector.body3734 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10528,26 +10347,26 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_819 b .LBB5_573 -.LBB5_820: # %vector.body3695.preheader +.LBB5_820: # %vector.body3724.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_821: # %vector.body3695 +.LBB5_821: # %vector.body3724 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_821 -# %bb.822: # %vector.body3700.preheader +# %bb.822: # %vector.body3729.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_823: # %vector.body3700 +.LBB5_823: # %vector.body3729 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10555,95 +10374,92 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_823 b .LBB5_573 -.LBB5_824: # %vector.body3680.preheader +.LBB5_824: # %vector.body3703.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_825: # %vector.body3680 +.LBB5_825: # %vector.body3703 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_825 -# %bb.826: # %vector.body3685.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.826: # %vector.body3708.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_827: # %vector.body3685 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_827: # %vector.body3708 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_827 -# %bb.828: # %vector.body3690.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.828: # %vector.body3716.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_829: # %vector.body3690 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_829: # %vector.body3716 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_829 b .LBB5_573 -.LBB5_830: # %vector.body3668.preheader +.LBB5_830: # %vector.body3691.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_831: # %vector.body3668 +.LBB5_831: # %vector.body3691 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_831 -# %bb.832: # %vector.body3673.preheader +# %bb.832: # %vector.body3696.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_833: # %vector.body3673 +.LBB5_833: # %vector.body3696 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10675,16 +10491,16 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_833 b .LBB5_573 -.LBB5_834: # %vector.body3653.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_834: # %vector.body3674.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_835: # %vector.body3653 +.LBB5_835: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10715,7 +10531,7 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_835 -# %bb.836: # %vector.body3658.preheader +# %bb.836: # %vector.body3681.preheader pcalau12i $a1, %pc_hi20(global_data) addi.d $a1, $a1, %pc_lo12(global_data) lu12i.w $a2, 31 @@ -10723,20 +10539,20 @@ init: # @init add.d $a2, $a1, $a2 ori $a3, $a0, 3328 xvldi $xr0, -1424 -.LBB5_837: # %vector.body3658 +.LBB5_837: # %vector.body3681 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_837 -# %bb.838: # %vector.body3663.preheader +# %bb.838: # %vector.body3686.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 xvldi $xr0, -1296 -.LBB5_839: # %vector.body3663 +.LBB5_839: # %vector.body3686 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -10744,40 +10560,40 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_839 b .LBB5_573 -.LBB5_840: # %vector.body3638.preheader +.LBB5_840: # %vector.body3659.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_841: # %vector.body3638 +.LBB5_841: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_841 -# %bb.842: # %vector.body3643.preheader +# %bb.842: # %vector.body3664.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -3264 -.LBB5_843: # %vector.body3643 +.LBB5_843: # %vector.body3664 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_843 -# %bb.844: # %vector.body3648.preheader +# %bb.844: # %vector.body3669.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3265 -.LBB5_845: # %vector.body3648 +.LBB5_845: # %vector.body3669 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10785,82 +10601,78 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_845 b .LBB5_573 -.LBB5_846: # %vector.body3633.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_847: # %vector.body3633 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_847 - b .LBB5_573 -.LBB5_848: # %vector.body3623.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 +.LBB5_846: # %vector.body3651.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_849: # %vector.body3623 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_849 -# %bb.850: # %vector.body3628.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_851: # %vector.body3628 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_847: # %vector.body3651 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_847 + b .LBB5_573 +.LBB5_848: # %vector.body3635.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_849: # %vector.body3635 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_849 +# %bb.850: # %vector.body3643.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_851: # %vector.body3643 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_851 b .LBB5_573 .Lfunc_end5: diff --git a/results/MultiSource/Benchmarks/TSVC/IndirectAddressing-flt/CMakeFiles/IndirectAddressing-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/IndirectAddressing-flt/CMakeFiles/IndirectAddressing-flt.dir/tsc.s index 671952c..6e6c27a 100644 --- a/results/MultiSource/Benchmarks/TSVC/IndirectAddressing-flt/CMakeFiles/IndirectAddressing-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/IndirectAddressing-flt/CMakeFiles/IndirectAddressing-flt.dir/tsc.s @@ -11,6 +11,11 @@ .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI0_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1d .p2align 5 @@ -63,30 +68,29 @@ set1d: # @set1d bnez $a1, .LBB0_3 b .LBB0_8 .LBB0_4: # %vector.body40.preheader - addi.d $a0, $a0, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a1, %pc_hi20(.LCPI0_2) + vld $vr0, $a1, %pc_lo12(.LCPI0_2) + addi.d $a0, $a0, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB0_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB0_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB0_5 b .LBB0_8 .LBB0_6: # %.preheader.preheader move $a2, $zero @@ -118,6 +122,11 @@ set1d: # @set1d .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI1_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1ds .p2align 5 @@ -170,30 +179,29 @@ set1ds: # @set1ds bnez $a0, .LBB1_3 b .LBB1_8 .LBB1_4: # %vector.body40.preheader - addi.d $a0, $a1, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) + addi.d $a0, $a1, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB1_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB1_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB1_5 b .LBB1_8 .LBB1_6: # %.preheader.preheader move $a0, $zero @@ -685,14 +693,19 @@ check: # @check .word .LBB4_14-.LJTI4_0 .word .LBB4_17-.LJTI4_0 # -- End function - .section .rodata.cst32,"aM",@progbits,32 - .p2align 5, 0x0 # -- Begin function init + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 # -- Begin function init .LCPI5_0: + .dword 0 # 0x0 + .dword 1 # 0x1 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 +.LCPI5_1: .dword 4 # 0x4 .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 -.LCPI5_1: +.LCPI5_2: .dword 0 # 0x0 .dword 1 # 0x1 .dword 2 # 0x2 @@ -703,11 +716,12 @@ check: # @check .type init,@function init: # @init # %bb.0: - addi.d $sp, $sp, -80 - st.d $ra, $sp, 72 # 8-byte Folded Spill - st.d $fp, $sp, 64 # 8-byte Folded Spill - st.d $s0, $sp, 56 # 8-byte Folded Spill - st.d $s1, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -96 + st.d $ra, $sp, 88 # 8-byte Folded Spill + st.d $fp, $sp, 80 # 8-byte Folded Spill + st.d $s0, $sp, 72 # 8-byte Folded Spill + st.d $s1, $sp, 64 # 8-byte Folded Spill + st.d $s2, $sp, 56 # 8-byte Folded Spill move $fp, $a0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a1, $a0, %pc_lo12(.L.str.1) @@ -1661,131 +1675,122 @@ init: # @init jirl $ra, $ra, 0 bnez $a0, .LBB5_573 # %bb.136: # %vector.body.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 + addi.d $a3, $a0, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 .LBB5_137: # %vector.body # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_137 # %bb.138: # %vector.body3598.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 + lu12i.w $a3, 31 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 .LBB5_139: # %vector.body3598 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_139 -# %bb.140: # %vector.body3603.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_141: # %vector.body3603 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.140: # %vector.body3606.preheader + lu12i.w $a3, 62 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_141: # %vector.body3606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_141 -# %bb.142: # %vector.body3608.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_143: # %vector.body3608 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.142: # %vector.body3614.preheader + lu12i.w $a3, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_143: # %vector.body3614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_143 -# %bb.144: # %vector.body3613.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_145: # %vector.body3613 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 - bnez $a1, .LBB5_145 +# %bb.144: # %vector.body3622.preheader + lu12i.w $a3, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a3, 136 + add.d $a1, $a0, $a1 + ori $a2, $a2, 3328 +.LBB5_145: # %vector.body3622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_145 # %bb.146: # %.preheader34.i2938.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -1837,7 +1842,7 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_147 b .LBB5_573 -.LBB5_148: # %vector.body5445.preheader +.LBB5_148: # %vector.body5846.preheader pcalau12i $a0, %pc_hi20(X+4) addi.d $a0, $a0, %pc_lo12(X+4) pcalau12i $a1, %pc_hi20(V+4) @@ -1852,7 +1857,7 @@ init: # @init lu12i.w $a6, 7 ori $a6, $a6, 3328 .p2align 4, , 16 -.LBB5_149: # %vector.body5445 +.LBB5_149: # %vector.body5846 # =>This Inner Loop Header: Depth=1 bstrpick.d $a7, $a5, 31, 0 addi.d $t0, $a5, -5 @@ -1896,7 +1901,7 @@ init: # @init addi.d $a4, $a4, 8 bnez $a6, .LBB5_149 b .LBB5_573 -.LBB5_150: # %vector.body5412.preheader +.LBB5_150: # %vector.body5813.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -1904,24 +1909,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_151: # %vector.body5412 +.LBB5_151: # %vector.body5813 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_151 -# %bb.152: # %vector.body5417.preheader +# %bb.152: # %vector.body5818.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_153: # %vector.body5417 +.LBB5_153: # %vector.body5818 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1952,15 +1957,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_153 -# %bb.154: # %vector.body5424.preheader +# %bb.154: # %vector.body5825.preheader lu12i.w $a4, 62 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 2096 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_155: # %vector.body5424 +.LBB5_155: # %vector.body5825 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1991,15 +1996,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_155 -# %bb.156: # %vector.body5431.preheader +# %bb.156: # %vector.body5832.preheader lu12i.w $a4, 93 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 3152 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_157: # %vector.body5431 +.LBB5_157: # %vector.body5832 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2030,15 +2035,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_157 -# %bb.158: # %vector.body5438.preheader +# %bb.158: # %vector.body5839.preheader lu12i.w $a4, 125 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_159: # %vector.body5438 +.LBB5_159: # %vector.body5839 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2070,7 +2075,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_159 b .LBB5_573 -.LBB5_160: # %vector.body5400.preheader +.LBB5_160: # %vector.body5801.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2078,24 +2083,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_161: # %vector.body5400 +.LBB5_161: # %vector.body5801 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_161 -# %bb.162: # %vector.body5405.preheader +# %bb.162: # %vector.body5806.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_163: # %vector.body5405 +.LBB5_163: # %vector.body5806 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2127,7 +2132,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_163 b .LBB5_573 -.LBB5_164: # %vector.body5388.preheader +.LBB5_164: # %vector.body5789.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2135,24 +2140,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_165: # %vector.body5388 +.LBB5_165: # %vector.body5789 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_165 -# %bb.166: # %vector.body5393.preheader +# %bb.166: # %vector.body5794.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_167: # %vector.body5393 +.LBB5_167: # %vector.body5794 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2288,7 +2293,7 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_171 b .LBB5_573 -.LBB5_172: # %vector.body5354.preheader +.LBB5_172: # %vector.body5755.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2296,7 +2301,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_173: # %vector.body5354 +.LBB5_173: # %vector.body5755 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2436,14 +2441,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_179 b .LBB5_573 -.LBB5_180: # %vector.body5349.preheader +.LBB5_180: # %vector.body5750.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_181: # %vector.body5349 +.LBB5_181: # %vector.body5750 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -2451,7 +2456,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_181 b .LBB5_573 -.LBB5_182: # %vector.body5339.preheader +.LBB5_182: # %vector.body5740.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2459,7 +2464,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_183: # %vector.body5339 +.LBB5_183: # %vector.body5740 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2610,30 +2615,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_189 b .LBB5_573 -.LBB5_190: # %vector.body5315.preheader +.LBB5_190: # %vector.body5716.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_191: # %vector.body5315 +.LBB5_191: # %vector.body5716 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_191 -# %bb.192: # %vector.body5320.preheader +# %bb.192: # %vector.body5721.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_193: # %vector.body5320 +.LBB5_193: # %vector.body5721 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2665,30 +2670,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_193 b .LBB5_573 -.LBB5_194: # %vector.body5303.preheader +.LBB5_194: # %vector.body5704.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_195: # %vector.body5303 +.LBB5_195: # %vector.body5704 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_195 -# %bb.196: # %vector.body5308.preheader +# %bb.196: # %vector.body5709.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_197: # %vector.body5308 +.LBB5_197: # %vector.body5709 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2734,74 +2739,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_199: # %vector.body5283 +.LBB5_199: # %vector.body5678 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_199 -# %bb.200: # %vector.body5288.preheader +# %bb.200: # %vector.body5683.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_201: # %vector.body5288 +.LBB5_201: # %vector.body5683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_201 -# %bb.202: # %vector.body5293.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.202: # %vector.body5688.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_203: # %vector.body5293 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_203: # %vector.body5688 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_203 -# %bb.204: # %vector.body5298.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.204: # %vector.body5696.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_205: # %vector.body5298 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_205: # %vector.body5696 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_205 b .LBB5_573 .LBB5_206: # %.preheader.i1195.preheader @@ -2818,74 +2820,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_207: # %vector.body5263 +.LBB5_207: # %vector.body5652 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_207 -# %bb.208: # %vector.body5268.preheader +# %bb.208: # %vector.body5657.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_209: # %vector.body5268 +.LBB5_209: # %vector.body5657 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_209 -# %bb.210: # %vector.body5273.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.210: # %vector.body5662.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_211: # %vector.body5273 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_211: # %vector.body5662 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_211 -# %bb.212: # %vector.body5278.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.212: # %vector.body5670.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_213: # %vector.body5278 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_213: # %vector.body5670 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_213 b .LBB5_573 .LBB5_214: # %.preheader.i1217.preheader @@ -3072,31 +3071,30 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_221 -# %bb.222: # %vector.body5236.preheader - pcalau12i $a1, %pc_hi20(array+4) - addi.d $a1, $a1, %pc_lo12(array+4) - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_223: # %vector.body5236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_223 +# %bb.222: # %vector.body5622.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(array+8) + addi.d $a1, $a1, %pc_lo12(array+8) + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_223: # %vector.body5622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_223 # %bb.224: # %.preheader34.i1253.preheader lu12i.w $a1, 284 ori $a1, $a1, 1888 @@ -3162,87 +3160,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_227: # %vector.body5211 +.LBB5_227: # %vector.body5588 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_227 -# %bb.228: # %vector.body5216.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.228: # %vector.body5593.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_229: # %vector.body5216 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_229: # %vector.body5593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_229 -# %bb.230: # %vector.body5221.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.230: # %vector.body5601.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_231: # %vector.body5221 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_231: # %vector.body5601 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_231 -# %bb.232: # %vector.body5226.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.232: # %vector.body5609.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_233: # %vector.body5226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_233: # %vector.body5609 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_233 b .LBB5_573 .LBB5_234: # %.preheader.i1284.preheader @@ -3259,32 +3252,32 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -3264 -.LBB5_235: # %vector.body5196 +.LBB5_235: # %vector.body5573 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_235 -# %bb.236: # %vector.body5201.preheader +# %bb.236: # %vector.body5578.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_237: # %vector.body5201 +.LBB5_237: # %vector.body5578 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_237 -# %bb.238: # %vector.body5206.preheader +# %bb.238: # %vector.body5583.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_239: # %vector.body5206 +.LBB5_239: # %vector.body5583 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -3292,30 +3285,30 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_239 b .LBB5_573 -.LBB5_240: # %vector.body5184.preheader +.LBB5_240: # %vector.body5561.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_241: # %vector.body5184 +.LBB5_241: # %vector.body5561 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_241 -# %bb.242: # %vector.body5189.preheader +# %bb.242: # %vector.body5566.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_243: # %vector.body5189 +.LBB5_243: # %vector.body5566 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3392,65 +3385,62 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_245 -# %bb.246: # %vector.body5174.preheader +# %bb.246: # %vector.body5545.preheader lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_247: # %vector.body5174 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_247: # %vector.body5545 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_247 -# %bb.248: # %vector.body5179.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.248: # %vector.body5553.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_249: # %vector.body5179 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_249: # %vector.body5553 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_249 b .LBB5_573 -.LBB5_250: # %vector.body5157.preheader +.LBB5_250: # %vector.body5528.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_251: # %vector.body5157 +.LBB5_251: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3506,30 +3496,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_253 b .LBB5_573 -.LBB5_254: # %vector.body5145.preheader +.LBB5_254: # %vector.body5516.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_255: # %vector.body5145 +.LBB5_255: # %vector.body5516 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_255 -# %bb.256: # %vector.body5150.preheader +# %bb.256: # %vector.body5521.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_257: # %vector.body5150 +.LBB5_257: # %vector.body5521 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3561,14 +3551,14 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_257 b .LBB5_573 -.LBB5_258: # %vector.body5125.preheader +.LBB5_258: # %vector.body5487.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 xvldi $xr0, -1424 -.LBB5_259: # %vector.body5125 +.LBB5_259: # %vector.body5487 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3583,274 +3573,263 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 + lu12i.w $a1, 62 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 2104 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_261: # %vector.body5130 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_261: # %vector.body5492 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_261 -# %bb.262: # %vector.body5135.preheader - lu12i.w $a0, 93 - ori $a0, $a0, 3156 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 +# %bb.262: # %vector.body5500.preheader + lu12i.w $a1, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 3160 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_263: # %vector.body5135 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_263: # %vector.body5500 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_263 -# %bb.264: # %vector.body5140.preheader - lu12i.w $a0, 125 - ori $a0, $a0, 132 +# %bb.264: # %vector.body5508.preheader + lu12i.w $a1, 125 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a1, 136 add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - ori $a2, $s0, 3328 -.LBB5_265: # %vector.body5140 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_265 + ori $a1, $s0, 3328 +.LBB5_265: # %vector.body5508 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_265 b .LBB5_573 -.LBB5_266: # %vector.body5094.preheader +.LBB5_266: # %vector.body5450.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_267: # %vector.body5094 +.LBB5_267: # %vector.body5450 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_267 -# %bb.268: # %vector.body5099.preheader +# %bb.268: # %vector.body5455.preheader lu12i.w $a2, 31 ori $a3, $a2, 1048 add.d $a4, $a0, $a3 lu12i.w $a3, 3 ori $a5, $a3, 3712 lu12i.w $a6, 260096 -.LBB5_269: # %vector.body5099 +.LBB5_269: # %vector.body5455 # =>This Inner Loop Header: Depth=1 st.w $a6, $a4, -8 st.w $a6, $a4, 0 addi.d $a5, $a5, -2 addi.d $a4, $a4, 16 bnez $a5, .LBB5_269 -# %bb.270: # %vector.body5104.preheader +# %bb.270: # %vector.body5460.preheader ori $a2, $a2, 1052 add.d $a2, $a0, $a2 ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 -.LBB5_271: # %vector.body5104 +.LBB5_271: # %vector.body5460 # =>This Inner Loop Header: Depth=1 st.w $a4, $a2, -8 st.w $a4, $a2, 0 addi.d $a3, $a3, -2 addi.d $a2, $a2, 16 bnez $a3, .LBB5_271 -# %bb.272: # %vector.body5110.preheader +# %bb.272: # %vector.body5466.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_273: # %vector.body5110 +.LBB5_273: # %vector.body5466 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_273 -# %bb.274: # %vector.body5115.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.274: # %vector.body5471.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_275: # %vector.body5115 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_275: # %vector.body5471 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_275 -# %bb.276: # %vector.body5120.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.276: # %vector.body5479.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_277: # %vector.body5120 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_277: # %vector.body5479 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_277 b .LBB5_573 -.LBB5_278: # %vector.body5079.preheader +.LBB5_278: # %vector.body5429.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_279: # %vector.body5079 +.LBB5_279: # %vector.body5429 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_279 -# %bb.280: # %vector.body5084.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.280: # %vector.body5434.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_281: # %vector.body5084 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_281: # %vector.body5434 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_281 -# %bb.282: # %vector.body5089.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.282: # %vector.body5442.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_283: # %vector.body5089 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_283: # %vector.body5442 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_283 b .LBB5_573 -.LBB5_284: # %vector.body5067.preheader +.LBB5_284: # %vector.body5417.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_285: # %vector.body5067 +.LBB5_285: # %vector.body5417 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_285 -# %bb.286: # %vector.body5072.preheader +# %bb.286: # %vector.body5422.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_287: # %vector.body5072 +.LBB5_287: # %vector.body5422 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3882,30 +3861,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_287 b .LBB5_573 -.LBB5_288: # %vector.body5055.preheader +.LBB5_288: # %vector.body5405.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_289: # %vector.body5055 +.LBB5_289: # %vector.body5405 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_289 -# %bb.290: # %vector.body5060.preheader +# %bb.290: # %vector.body5410.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_291: # %vector.body5060 +.LBB5_291: # %vector.body5410 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3937,30 +3916,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_291 b .LBB5_573 -.LBB5_292: # %vector.body5043.preheader +.LBB5_292: # %vector.body5393.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_293: # %vector.body5043 +.LBB5_293: # %vector.body5393 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_293 -# %bb.294: # %vector.body5048.preheader +# %bb.294: # %vector.body5398.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_295: # %vector.body5048 +.LBB5_295: # %vector.body5398 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3992,30 +3971,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_295 b .LBB5_573 -.LBB5_296: # %vector.body5031.preheader +.LBB5_296: # %vector.body5381.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_297: # %vector.body5031 +.LBB5_297: # %vector.body5381 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_297 -# %bb.298: # %vector.body5036.preheader +# %bb.298: # %vector.body5386.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_299: # %vector.body5036 +.LBB5_299: # %vector.body5386 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4047,30 +4026,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_299 b .LBB5_573 -.LBB5_300: # %vector.body5019.preheader +.LBB5_300: # %vector.body5369.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_301: # %vector.body5019 +.LBB5_301: # %vector.body5369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_301 -# %bb.302: # %vector.body5024.preheader +# %bb.302: # %vector.body5374.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_303: # %vector.body5024 +.LBB5_303: # %vector.body5374 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4102,69 +4081,66 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_303 b .LBB5_573 -.LBB5_304: # %vector.body5004.preheader +.LBB5_304: # %vector.body5348.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_305: # %vector.body5004 +.LBB5_305: # %vector.body5348 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_305 -# %bb.306: # %vector.body5009.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.306: # %vector.body5353.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_307: # %vector.body5009 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_307: # %vector.body5353 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_307 -# %bb.308: # %vector.body5014.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.308: # %vector.body5361.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_309: # %vector.body5014 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_309: # %vector.body5361 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_309 b .LBB5_573 .LBB5_310: # %.preheader.i1469.preheader @@ -4181,254 +4157,241 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_311: # %vector.body4984 +.LBB5_311: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_311 -# %bb.312: # %vector.body4989.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.312: # %vector.body5324.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_313: # %vector.body4989 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_313: # %vector.body5324 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_313 -# %bb.314: # %vector.body4994.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.314: # %vector.body5332.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_315: # %vector.body4994 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_315: # %vector.body5332 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_315 -# %bb.316: # %vector.body4999.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.316: # %vector.body5340.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_317: # %vector.body4999 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_317: # %vector.body5340 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_317 b .LBB5_573 -.LBB5_318: # %vector.body4964.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_319: # %vector.body4964 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_318: # %vector.body5293.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_319: # %vector.body5293 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_319 -# %bb.320: # %vector.body4969.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1072 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 +# %bb.320: # %vector.body5301.preheader + lu12i.w $a3, 31 + ori $a3, $a3, 1072 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 xvldi $xr0, -1424 -.LBB5_321: # %vector.body4969 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_321 -# %bb.322: # %vector.body4974.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2128 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 -.LBB5_323: # %vector.body4974 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_323 -# %bb.324: # %vector.body4979.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_325: # %vector.body4979 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_321: # %vector.body5301 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_321 +# %bb.322: # %vector.body5306.preheader + lu12i.w $a3, 62 + ori $a3, $a3, 2128 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 +.LBB5_323: # %vector.body5306 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_323 +# %bb.324: # %vector.body5311.preheader + lu12i.w $a3, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 3160 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_325: # %vector.body5311 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_325 b .LBB5_573 -.LBB5_326: # %vector.body4944.preheader +.LBB5_326: # %vector.body5264.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_327: # %vector.body4944 +.LBB5_327: # %vector.body5264 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_327 -# %bb.328: # %vector.body4949.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.328: # %vector.body5269.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_329: # %vector.body4949 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_329: # %vector.body5269 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_329 -# %bb.330: # %vector.body4954.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.330: # %vector.body5277.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_331: # %vector.body4954 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_331: # %vector.body5277 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_331 -# %bb.332: # %vector.body4959.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.332: # %vector.body5285.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_333: # %vector.body4959 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_333: # %vector.body5285 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_333 b .LBB5_573 .LBB5_334: # %.preheader.i1529.preheader @@ -4445,19 +4408,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_335: # %vector.body4934 +.LBB5_335: # %vector.body5254 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_335 -# %bb.336: # %vector.body4939.preheader +# %bb.336: # %vector.body5259.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_337: # %vector.body4939 +.LBB5_337: # %vector.body5259 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -4915,69 +4878,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_358 b .LBB5_573 -.LBB5_359: # %vector.body4850.preheader +.LBB5_359: # %vector.body5164.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_360: # %vector.body4850 +.LBB5_360: # %vector.body5164 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_360 -# %bb.361: # %vector.body4855.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.361: # %vector.body5169.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_362: # %vector.body4855 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a5 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_362: # %vector.body5169 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a3, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_362 -# %bb.363: # %vector.body4860.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.363: # %vector.body5177.preheader + lu12i.w $a3, 62 + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 ori $a1, $a1, 3328 -.LBB5_364: # %vector.body4860 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a4 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 +.LBB5_364: # %vector.body5177 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a2, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a1, $a1, -4 + addi.d $a2, $a2, 16 bnez $a1, .LBB5_364 # %bb.365: # %.preheader.i1631.preheader lu12i.w $a1, 156 @@ -5071,50 +5031,50 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_368 b .LBB5_573 -.LBB5_369: # %vector.body4830.preheader +.LBB5_369: # %vector.body5144.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_370: # %vector.body4830 +.LBB5_370: # %vector.body5144 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_370 -# %bb.371: # %vector.body4835.preheader +# %bb.371: # %vector.body5149.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_372: # %vector.body4835 +.LBB5_372: # %vector.body5149 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_372 -# %bb.373: # %vector.body4840.preheader +# %bb.373: # %vector.body5154.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_374: # %vector.body4840 +.LBB5_374: # %vector.body5154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_374 -# %bb.375: # %vector.body4845.preheader +# %bb.375: # %vector.body5159.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_376: # %vector.body4845 +.LBB5_376: # %vector.body5159 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5122,7 +5082,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_376 b .LBB5_573 -.LBB5_377: # %vector.body4810.preheader +.LBB5_377: # %vector.body5124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -5131,43 +5091,43 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_378: # %vector.body4810 +.LBB5_378: # %vector.body5124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_378 -# %bb.379: # %vector.body4815.preheader +# %bb.379: # %vector.body5129.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_380: # %vector.body4815 +.LBB5_380: # %vector.body5129 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_380 -# %bb.381: # %vector.body4820.preheader +# %bb.381: # %vector.body5134.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_382: # %vector.body4820 +.LBB5_382: # %vector.body5134 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_382 -# %bb.383: # %vector.body4825.preheader +# %bb.383: # %vector.body5139.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_384: # %vector.body4825 +.LBB5_384: # %vector.body5139 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5189,87 +5149,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_386: # %vector.body4790 +.LBB5_386: # %vector.body5095 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_386 -# %bb.387: # %vector.body4795.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.387: # %vector.body5100.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_388: # %vector.body4795 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_388: # %vector.body5100 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_388 -# %bb.389: # %vector.body4800.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.389: # %vector.body5108.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_390: # %vector.body4800 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_390: # %vector.body5108 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_390 -# %bb.391: # %vector.body4805.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.391: # %vector.body5116.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_392: # %vector.body4805 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_392: # %vector.body5116 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_392 b .LBB5_573 .LBB5_393: # %.preheader.i1701.preheader @@ -5286,14 +5241,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_394: # %vector.body4775 +.LBB5_394: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_394 -# %bb.395: # %vector.body4780.preheader +# %bb.395: # %vector.body5085.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -5301,19 +5256,19 @@ init: # @init lu12i.w $a3, 219235 ori $a3, $a3, 1981 xvreplgr2vr.w $xr0, $a3 -.LBB5_396: # %vector.body4780 +.LBB5_396: # %vector.body5085 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_396 -# %bb.397: # %vector.body4785.preheader +# %bb.397: # %vector.body5090.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_398: # %vector.body4785 +.LBB5_398: # %vector.body5090 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5335,87 +5290,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_400: # %vector.body4755 +.LBB5_400: # %vector.body5051 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_400 -# %bb.401: # %vector.body4760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.401: # %vector.body5056.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_402: # %vector.body4760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_402: # %vector.body5056 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_402 -# %bb.403: # %vector.body4765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.403: # %vector.body5064.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_404: # %vector.body4765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_404: # %vector.body5064 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_404 -# %bb.405: # %vector.body4770.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.405: # %vector.body5072.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_406: # %vector.body4770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_406: # %vector.body5072 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_406 b .LBB5_573 .LBB5_407: # %.preheader.i1740.preheader @@ -5432,19 +5382,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_408: # %vector.body4745 +.LBB5_408: # %vector.body5041 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_408 -# %bb.409: # %vector.body4750.preheader +# %bb.409: # %vector.body5046.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_410: # %vector.body4750 +.LBB5_410: # %vector.body5046 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5452,21 +5402,21 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_410 b .LBB5_573 -.LBB5_411: # %vector.body4725.preheader +.LBB5_411: # %vector.body5018.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_412: # %vector.body4725 +.LBB5_412: # %vector.body5018 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_412 -# %bb.413: # %vector.body4730.preheader +# %bb.413: # %vector.body5023.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 @@ -5474,49 +5424,48 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr1, $a4 -.LBB5_414: # %vector.body4730 +.LBB5_414: # %vector.body5023 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a2, -32 xvst $xr1, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_414 -# %bb.415: # %vector.body4735.preheader +# %bb.415: # %vector.body5028.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_416: # %vector.body4735 +.LBB5_416: # %vector.body5028 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_416 -# %bb.417: # %vector.body4740.preheader +# %bb.417: # %vector.body5033.preheader lu12i.w $a2, 93 - ori $a2, $a2, 3156 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_418: # %vector.body4740 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_418: # %vector.body5033 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_418 b .LBB5_573 .LBB5_419: # %.preheader.i1769.preheader @@ -5533,7 +5482,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_420: # %vector.body4720 +.LBB5_420: # %vector.body5013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5555,7 +5504,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_422: # %vector.body4715 +.LBB5_422: # %vector.body5008 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5563,14 +5512,14 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_422 b .LBB5_573 -.LBB5_423: # %vector.body4700.preheader +.LBB5_423: # %vector.body4993.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_424: # %vector.body4700 +.LBB5_424: # %vector.body4993 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5663,14 +5612,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_428 b .LBB5_573 -.LBB5_429: # %vector.body4685.preheader +.LBB5_429: # %vector.body4978.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_430: # %vector.body4685 +.LBB5_430: # %vector.body4978 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5763,32 +5712,31 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_434 b .LBB5_573 -.LBB5_435: # %vector.body4663.preheader +.LBB5_435: # %vector.body4947.preheader + pcalau12i $s1, %pc_hi20(.LCPI5_0) + vld $vr0, $s1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) - addi.d $a0, $s0, 4 - ori $a1, $zero, 2 - lu12i.w $s1, 7 - ori $a2, $s1, 3328 -.LBB5_436: # %vector.body4663 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_436 + addi.d $a0, $s0, 8 + lu12i.w $s2, 7 + ori $a1, $s2, 3328 +.LBB5_436: # %vector.body4947 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_436 # %bb.437: # %.preheader.i1826.preheader lu12i.w $a1, 31 ori $a0, $a1, 1040 @@ -5799,54 +5747,50 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 2104 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_438: # %vector.body4668 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_438 -# %bb.439: # %vector.body4673.preheader + ori $a1, $s2, 3328 +.LBB5_438: # %vector.body4955 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_438 +# %bb.439: # %vector.body4963.preheader lu12i.w $a0, 93 - ori $a0, $a0, 3156 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 3160 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_440: # %vector.body4673 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_440 + ori $a1, $s2, 3328 +.LBB5_440: # %vector.body4963 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_440 # %bb.441: # %.preheader.i1840.preheader lu12i.w $a0, 125 ori $a0, $a0, 128 @@ -5905,30 +5849,30 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_442 b .LBB5_573 -.LBB5_443: # %vector.body4639.preheader +.LBB5_443: # %vector.body4923.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_444: # %vector.body4639 +.LBB5_444: # %vector.body4923 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_444 -# %bb.445: # %vector.body4644.preheader +# %bb.445: # %vector.body4928.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 -.LBB5_446: # %vector.body4644 +.LBB5_446: # %vector.body4928 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5959,14 +5903,14 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_446 -# %bb.447: # %vector.body4651.preheader +# %bb.447: # %vector.body4935.preheader lu12i.w $a4, 62 - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 2096 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_448: # %vector.body4651 +.LBB5_448: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5997,12 +5941,12 @@ init: # @init addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 bnez $a3, .LBB5_448 -# %bb.449: # %vector.body4658.preheader +# %bb.449: # %vector.body4942.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_450: # %vector.body4658 +.LBB5_450: # %vector.body4942 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6010,154 +5954,148 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_450 b .LBB5_573 -.LBB5_451: # %vector.body4624.preheader +.LBB5_451: # %vector.body4902.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_452: # %vector.body4624 +.LBB5_452: # %vector.body4902 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_452 -# %bb.453: # %vector.body4629.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.453: # %vector.body4907.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_454: # %vector.body4629 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_454: # %vector.body4907 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_454 -# %bb.455: # %vector.body4634.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.455: # %vector.body4915.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_456: # %vector.body4634 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_456: # %vector.body4915 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_456 b .LBB5_573 -.LBB5_457: # %vector.body4599.preheader +.LBB5_457: # %vector.body4871.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_458: # %vector.body4599 +.LBB5_458: # %vector.body4871 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_458 -# %bb.459: # %vector.body4604.preheader +# %bb.459: # %vector.body4876.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_460: # %vector.body4604 +.LBB5_460: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_460 -# %bb.461: # %vector.body4609.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.461: # %vector.body4881.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_462: # %vector.body4609 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_462: # %vector.body4881 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_462 -# %bb.463: # %vector.body4614.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.463: # %vector.body4889.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_464: # %vector.body4614 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_464 -# %bb.465: # %vector.body4619.preheader + ori $a3, $a1, 3328 +.LBB5_464: # %vector.body4889 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 + bnez $a3, .LBB5_464 +# %bb.465: # %vector.body4897.preheader lu12i.w $a2, 125 ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3264 -.LBB5_466: # %vector.body4619 +.LBB5_466: # %vector.body4897 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6165,45 +6103,45 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_466 b .LBB5_573 -.LBB5_467: # %vector.body4574.preheader +.LBB5_467: # %vector.body4843.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_468: # %vector.body4574 +.LBB5_468: # %vector.body4843 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_468 -# %bb.469: # %vector.body4579.preheader +# %bb.469: # %vector.body4848.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_470: # %vector.body4579 +.LBB5_470: # %vector.body4848 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_470 -# %bb.471: # %vector.body4584.preheader +# %bb.471: # %vector.body4853.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_472: # %vector.body4584 +.LBB5_472: # %vector.body4853 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_472 -# %bb.473: # %vector.body4589.preheader +# %bb.473: # %vector.body4858.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a2, $a0, $a2 @@ -6211,37 +6149,36 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_474: # %vector.body4589 +.LBB5_474: # %vector.body4858 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_474 -# %bb.475: # %vector.body4594.preheader +# %bb.475: # %vector.body4863.preheader lu12i.w $a2, 125 - ori $a2, $a2, 132 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_476: # %vector.body4594 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_476: # %vector.body4863 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_476 b .LBB5_573 .LBB5_477: # %.preheader.i1927.preheader @@ -6258,74 +6195,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_478: # %vector.body4554 +.LBB5_478: # %vector.body4817 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_478 -# %bb.479: # %vector.body4559.preheader +# %bb.479: # %vector.body4822.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_480: # %vector.body4559 +.LBB5_480: # %vector.body4822 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_480 -# %bb.481: # %vector.body4564.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.481: # %vector.body4827.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_482: # %vector.body4564 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_482: # %vector.body4827 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_482 -# %bb.483: # %vector.body4569.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.483: # %vector.body4835.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_484: # %vector.body4569 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_484: # %vector.body4835 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_484 b .LBB5_573 .LBB5_485: # %.preheader.i1949.preheader @@ -6461,669 +6395,638 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_490 b .LBB5_573 -.LBB5_491: # %vector.body4519.preheader +.LBB5_491: # %vector.body4773.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_492: # %vector.body4519 +.LBB5_492: # %vector.body4773 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_492 -# %bb.493: # %vector.body4524.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.493: # %vector.body4778.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_494: # %vector.body4524 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_494: # %vector.body4778 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_494 -# %bb.495: # %vector.body4529.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.495: # %vector.body4786.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_496: # %vector.body4529 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_496: # %vector.body4786 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_496 -# %bb.497: # %vector.body4534.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.497: # %vector.body4794.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_498: # %vector.body4534 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_498: # %vector.body4794 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_498 b .LBB5_573 -.LBB5_499: # %vector.body4489.preheader +.LBB5_499: # %vector.body4734.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_500: # %vector.body4489 +.LBB5_500: # %vector.body4734 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_500 -# %bb.501: # %vector.body4494.preheader +# %bb.501: # %vector.body4739.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_502: # %vector.body4494 +.LBB5_502: # %vector.body4739 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_502 -# %bb.503: # %vector.body4499.preheader +# %bb.503: # %vector.body4744.preheader lu12i.w $a2, 46 ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_504: # %vector.body4499 +.LBB5_504: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_504 -# %bb.505: # %vector.body4504.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.505: # %vector.body4749.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_506: # %vector.body4504 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_506: # %vector.body4749 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_506 -# %bb.507: # %vector.body4509.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.507: # %vector.body4757.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_508: # %vector.body4509 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_508: # %vector.body4757 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_508 -# %bb.509: # %vector.body4514.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.509: # %vector.body4765.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_510: # %vector.body4514 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_510: # %vector.body4765 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_510 b .LBB5_573 -.LBB5_511: # %vector.body4459.preheader +.LBB5_511: # %vector.body4695.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_512: # %vector.body4459 +.LBB5_512: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_512 -# %bb.513: # %vector.body4464.preheader +# %bb.513: # %vector.body4700.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_514: # %vector.body4464 +.LBB5_514: # %vector.body4700 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_514 -# %bb.515: # %vector.body4469.preheader +# %bb.515: # %vector.body4705.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_516: # %vector.body4469 +.LBB5_516: # %vector.body4705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_516 -# %bb.517: # %vector.body4474.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.517: # %vector.body4710.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_518: # %vector.body4474 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_518: # %vector.body4710 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_518 -# %bb.519: # %vector.body4479.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.519: # %vector.body4718.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_520: # %vector.body4479 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_520: # %vector.body4718 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_520 -# %bb.521: # %vector.body4484.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.521: # %vector.body4726.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_522: # %vector.body4484 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_522: # %vector.body4726 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_522 b .LBB5_573 -.LBB5_523: # %vector.body4429.preheader +.LBB5_523: # %vector.body4656.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_524: # %vector.body4429 +.LBB5_524: # %vector.body4656 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_524 -# %bb.525: # %vector.body4434.preheader +# %bb.525: # %vector.body4661.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_526: # %vector.body4434 +.LBB5_526: # %vector.body4661 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_526 -# %bb.527: # %vector.body4439.preheader +# %bb.527: # %vector.body4666.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_528: # %vector.body4439 +.LBB5_528: # %vector.body4666 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_528 -# %bb.529: # %vector.body4444.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.529: # %vector.body4671.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_530: # %vector.body4444 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_530: # %vector.body4671 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_530 -# %bb.531: # %vector.body4449.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.531: # %vector.body4679.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_532: # %vector.body4449 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_532: # %vector.body4679 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_532 -# %bb.533: # %vector.body4454.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.533: # %vector.body4687.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_534: # %vector.body4454 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_534: # %vector.body4687 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_534 b .LBB5_573 -.LBB5_535: # %vector.body4404.preheader +.LBB5_535: # %vector.body4622.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_536: # %vector.body4404 +.LBB5_536: # %vector.body4622 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_536 -# %bb.537: # %vector.body4409.preheader +# %bb.537: # %vector.body4627.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_538: # %vector.body4409 +.LBB5_538: # %vector.body4627 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_538 -# %bb.539: # %vector.body4414.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.539: # %vector.body4632.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_540: # %vector.body4414 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_540: # %vector.body4632 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_540 -# %bb.541: # %vector.body4419.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.541: # %vector.body4640.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_542: # %vector.body4419 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_542: # %vector.body4640 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_542 -# %bb.543: # %vector.body4424.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.543: # %vector.body4648.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_544: # %vector.body4424 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_544: # %vector.body4648 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_544 b .LBB5_573 -.LBB5_545: # %vector.body4389.preheader +.LBB5_545: # %vector.body4601.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_546: # %vector.body4389 +.LBB5_546: # %vector.body4601 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_546 -# %bb.547: # %vector.body4394.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.547: # %vector.body4606.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_548: # %vector.body4394 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_548: # %vector.body4606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_548 -# %bb.549: # %vector.body4399.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.549: # %vector.body4614.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_550: # %vector.body4399 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_550: # %vector.body4614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_550 b .LBB5_573 -.LBB5_551: # %vector.body4374.preheader +.LBB5_551: # %vector.body4580.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_552: # %vector.body4374 +.LBB5_552: # %vector.body4580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_552 -# %bb.553: # %vector.body4379.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.553: # %vector.body4585.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_554: # %vector.body4379 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_554: # %vector.body4585 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_554 -# %bb.555: # %vector.body4384.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.555: # %vector.body4593.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_556: # %vector.body4384 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_556: # %vector.body4593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_556 b .LBB5_573 .LBB5_557: # %.preheader.i2121.preheader @@ -7140,19 +7043,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_558: # %vector.body4364 +.LBB5_558: # %vector.body4570 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_558 -# %bb.559: # %vector.body4369.preheader +# %bb.559: # %vector.body4575.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_560: # %vector.body4369 +.LBB5_560: # %vector.body4575 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7174,7 +7077,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_562: # %vector.body4359 +.LBB5_562: # %vector.body4565 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7196,7 +7099,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_564: # %vector.body4354 +.LBB5_564: # %vector.body4560 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7204,31 +7107,30 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_564 b .LBB5_573 -.LBB5_565: # %vector.body4349.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_566: # %vector.body4349 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_566 +.LBB5_565: # %vector.body4552.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_566: # %vector.body4552 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_566 b .LBB5_573 .LBB5_567: # %.preheader.i2154.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7377,43 +7279,43 @@ init: # @init bnez $a2, .LBB5_572 .LBB5_573: # %set1d.exit1086 move $a0, $zero - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $fp, $sp, 64 # 8-byte Folded Reload - ld.d $ra, $sp, 72 # 8-byte Folded Reload - addi.d $sp, $sp, 80 + ld.d $s2, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload + ld.d $s0, $sp, 72 # 8-byte Folded Reload + ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $ra, $sp, 88 # 8-byte Folded Reload + addi.d $sp, $sp, 96 ret .LBB5_574: # %.preheader.i2177.preheader pcalau12i $a0, %pc_hi20(global_data+640192) addi.d $a0, $a0, %pc_lo12(global_data+640192) b .LBB5_345 -.LBB5_575: # %vector.body4325.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_576: # %vector.body4325 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_576 +.LBB5_575: # %vector.body4525.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_576: # %vector.body4525 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_576 b .LBB5_573 -.LBB5_577: # %vector.body4320.preheader +.LBB5_577: # %vector.body4520.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 @@ -7421,7 +7323,7 @@ init: # @init lu12i.w $a2, 260096 ori $a2, $a2, 8 xvreplgr2vr.w $xr0, $a2 -.LBB5_578: # %vector.body4320 +.LBB5_578: # %vector.body4520 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7429,162 +7331,155 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_578 b .LBB5_573 -.LBB5_579: # %vector.body4310.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_580: # %vector.body4310 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_579: # %vector.body4504.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_580: # %vector.body4504 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_580 -# %bb.581: # %vector.body4315.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_582: # %vector.body4315 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.581: # %vector.body4512.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_582: # %vector.body4512 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_582 b .LBB5_573 -.LBB5_583: # %vector.body4305.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_584: # %vector.body4305 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_584 +.LBB5_583: # %vector.body4496.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_584: # %vector.body4496 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_584 b .LBB5_573 -.LBB5_585: # %vector.body4300.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_586: # %vector.body4300 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_586 +.LBB5_585: # %vector.body4488.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_586: # %vector.body4488 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_586 b .LBB5_573 -.LBB5_587: # %vector.body4295.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_588: # %vector.body4295 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_588 +.LBB5_587: # %vector.body4480.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_588: # %vector.body4480 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_588 b .LBB5_573 -.LBB5_589: # %vector.body4290.preheader +.LBB5_589: # %vector.body4472.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_590: # %vector.body4290 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_590 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_590: # %vector.body4472 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_590 b .LBB5_607 .LBB5_591: # %.preheader.i2230.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7603,79 +7498,74 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a1, $fp, $a0 - ori $a2, $zero, 2 + ori $a0, $a0, 2104 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + add.d $a2, $fp, $a0 lu12i.w $a0, 7 ori $a3, $a0, 3328 -.LBB5_592: # %vector.body4275 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_592: # %vector.body4448 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_592 -# %bb.593: # %vector.body4280.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.593: # %vector.body4456.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_594: # %vector.body4280 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_594: # %vector.body4456 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_594 -# %bb.595: # %vector.body4285.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.595: # %vector.body4464.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_596: # %vector.body4285 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_596: # %vector.body4464 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_596 b .LBB5_573 .LBB5_597: # %.preheader34.i2253.preheader @@ -7734,42 +7624,41 @@ init: # @init lu12i.w $a1, 220 ori $a1, $a1, 1212 b .LBB5_630 -.LBB5_600: # %vector.body4263.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_601: # %vector.body4263 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_601 - b .LBB5_573 -.LBB5_602: # %vector.body4256.preheader +.LBB5_600: # %vector.body4433.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_601: # %vector.body4433 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_601 + b .LBB5_573 +.LBB5_602: # %vector.body4426.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_603: # %vector.body4256 +.LBB5_603: # %vector.body4426 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -7804,45 +7693,44 @@ init: # @init pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) b .LBB5_610 -.LBB5_605: # %vector.body4251.preheader +.LBB5_605: # %vector.body4418.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_606: # %vector.body4251 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_606 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_606: # %vector.body4418 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_606 .LBB5_607: # %set1d.exit2229 lu12i.w $a1, 31 ori $a1, $a1, 1020 lu12i.w $a2, -262144 b .LBB5_687 -.LBB5_608: # %vector.body4246.preheader +.LBB5_608: # %vector.body4413.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_609: # %vector.body4246 +.LBB5_609: # %vector.body4413 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7855,14 +7743,14 @@ init: # @init add.d $a0, $a0, $a2 ori $a2, $a1, 1024 b .LBB5_346 -.LBB5_611: # %vector.body4241.preheader +.LBB5_611: # %vector.body4408.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_612: # %vector.body4241 +.LBB5_612: # %vector.body4408 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7886,147 +7774,141 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 b .LBB5_573 -.LBB5_614: # %vector.body4216.preheader +.LBB5_614: # %vector.body4374.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_615: # %vector.body4216 +.LBB5_615: # %vector.body4374 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_615 -# %bb.616: # %vector.body4221.preheader +# %bb.616: # %vector.body4379.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_617: # %vector.body4221 +.LBB5_617: # %vector.body4379 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_617 -# %bb.618: # %vector.body4226.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.618: # %vector.body4384.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_619: # %vector.body4226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_619: # %vector.body4384 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_619 -# %bb.620: # %vector.body4231.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.620: # %vector.body4392.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_621: # %vector.body4231 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_621: # %vector.body4392 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_621 -# %bb.622: # %vector.body4236.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.622: # %vector.body4400.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_623: # %vector.body4236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_623: # %vector.body4400 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_623 b .LBB5_573 -.LBB5_624: # %vector.body4211.preheader +.LBB5_624: # %vector.body4366.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_625: # %vector.body4211 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_625 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_625: # %vector.body4366 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_625 # %bb.626: # %set1d.exit2327 lu12i.w $a1, 31 b .LBB5_686 -.LBB5_627: # %vector.body4204.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_627: # %vector.body4359.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_628: # %vector.body4204 +.LBB5_628: # %vector.body4359 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8075,81 +7957,77 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ori $a0, $s0, 1044 + ori $a0, $s0, 1048 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_632: # %vector.body4199 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_632 - b .LBB5_573 -.LBB5_633: # %vector.body4189.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_634: # %vector.body4189 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_634 -# %bb.635: # %vector.body4194.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_636: # %vector.body4194 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_632: # %vector.body4351 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_632 + b .LBB5_573 +.LBB5_633: # %vector.body4335.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_634: # %vector.body4335 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_634 +# %bb.635: # %vector.body4343.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_636: # %vector.body4343 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_636 b .LBB5_573 .LBB5_637: # %.preheader34.i2352.preheader @@ -8248,26 +8126,26 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_640 b .LBB5_573 -.LBB5_641: # %vector.body4167.preheader +.LBB5_641: # %vector.body4313.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_642: # %vector.body4167 +.LBB5_642: # %vector.body4313 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_642 -# %bb.643: # %vector.body4172.preheader +# %bb.643: # %vector.body4318.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_644: # %vector.body4172 +.LBB5_644: # %vector.body4318 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8275,78 +8153,75 @@ init: # @init addi.d $a2, $a2, 64 bnez $a1, .LBB5_644 b .LBB5_653 -.LBB5_645: # %vector.body4157.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_646: # %vector.body4157 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_646 -# %bb.647: # %vector.body4162.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_648: # %vector.body4162 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_645: # %vector.body4297.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_646: # %vector.body4297 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_646 +# %bb.647: # %vector.body4305.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_648: # %vector.body4305 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_648 b .LBB5_573 -.LBB5_649: # %vector.body4147.preheader +.LBB5_649: # %vector.body4287.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_650: # %vector.body4147 +.LBB5_650: # %vector.body4287 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_650 -# %bb.651: # %vector.body4152.preheader +# %bb.651: # %vector.body4292.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_652: # %vector.body4152 +.LBB5_652: # %vector.body4292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8359,134 +8234,128 @@ init: # @init lu12i.w $a2, 260096 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_654: # %vector.body4132.preheader +.LBB5_654: # %vector.body4266.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_655: # %vector.body4132 +.LBB5_655: # %vector.body4266 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_655 -# %bb.656: # %vector.body4137.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.656: # %vector.body4271.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_657: # %vector.body4137 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_657: # %vector.body4271 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_657 -# %bb.658: # %vector.body4142.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.658: # %vector.body4279.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_659: # %vector.body4142 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_659: # %vector.body4279 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_659 b .LBB5_573 -.LBB5_660: # %vector.body4117.preheader +.LBB5_660: # %vector.body4245.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_661: # %vector.body4117 +.LBB5_661: # %vector.body4245 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_661 -# %bb.662: # %vector.body4122.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.662: # %vector.body4250.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_663: # %vector.body4122 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_663: # %vector.body4250 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_663 -# %bb.664: # %vector.body4127.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.664: # %vector.body4258.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_665: # %vector.body4127 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_665: # %vector.body4258 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_665 b .LBB5_573 .LBB5_666: # %.preheader.i2421.preheader @@ -8503,74 +8372,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_667: # %vector.body4097 +.LBB5_667: # %vector.body4219 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_667 -# %bb.668: # %vector.body4102.preheader +# %bb.668: # %vector.body4224.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_669: # %vector.body4102 +.LBB5_669: # %vector.body4224 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_669 -# %bb.670: # %vector.body4107.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.670: # %vector.body4229.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_671: # %vector.body4107 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_671: # %vector.body4229 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_671 -# %bb.672: # %vector.body4112.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.672: # %vector.body4237.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_673: # %vector.body4112 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_673: # %vector.body4237 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_673 b .LBB5_573 .LBB5_674: # %.preheader.i2443.preheader @@ -8719,69 +8585,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_679 b .LBB5_573 -.LBB5_680: # %vector.body4063.preheader +.LBB5_680: # %vector.body4179.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 xvldi $xr0, -1424 -.LBB5_681: # %vector.body4063 +.LBB5_681: # %vector.body4179 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a3, $a3, -16 addi.d $a1, $a1, 64 bnez $a3, .LBB5_681 -# %bb.682: # %vector.body4068.preheader +# %bb.682: # %vector.body4184.preheader lu12i.w $a1, 31 - ori $a3, $a1, 1044 - add.d $a3, $a0, $a3 - ori $a4, $zero, 2 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a4, $a1, 1048 + add.d $a4, $a0, $a4 ori $a5, $a2, 3328 -.LBB5_683: # %vector.body4068 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - addi.d $a7, $a4, -1 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa0, $a7 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a5, $a5, -2 - addi.w $a4, $a4, 2 +.LBB5_683: # %vector.body4184 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a4, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a5, $a5, -4 + addi.d $a4, $a4, 16 bnez $a5, .LBB5_683 -# %bb.684: # %vector.body4073.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2100 +# %bb.684: # %vector.body4192.preheader + lu12i.w $a4, 62 + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a3, $a4, 2104 add.d $a3, $a0, $a3 - ori $a4, $zero, 2 ori $a2, $a2, 3328 -.LBB5_685: # %vector.body4073 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a4, 31, 0 - addi.d $a6, $a4, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a2, $a2, -2 - addi.w $a4, $a4, 2 +.LBB5_685: # %vector.body4192 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a3, $a3, 16 bnez $a2, .LBB5_685 .LBB5_686: # %set1d.exit2479 ori $a1, $a1, 1020 @@ -8790,16 +8653,16 @@ init: # @init lu32i.d $a2, 0 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_688: # %vector.body4051.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_688: # %vector.body4167.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_689: # %vector.body4051 +.LBB5_689: # %vector.body4167 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8830,12 +8693,12 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_689 -# %bb.690: # %vector.body4058.preheader +# %bb.690: # %vector.body4174.preheader pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 xvldi $xr0, -1424 -.LBB5_691: # %vector.body4058 +.LBB5_691: # %vector.body4174 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -8843,28 +8706,28 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_691 b .LBB5_573 -.LBB5_692: # %vector.body4039.preheader +.LBB5_692: # %vector.body4155.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_693: # %vector.body4039 +.LBB5_693: # %vector.body4155 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_693 -# %bb.694: # %vector.body4044.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.694: # %vector.body4160.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_695: # %vector.body4044 +.LBB5_695: # %vector.body4160 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8904,15 +8767,15 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_697: # %vector.body4032 +.LBB5_697: # %vector.body4148 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8944,28 +8807,28 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_697 b .LBB5_573 -.LBB5_698: # %vector.body4020.preheader +.LBB5_698: # %vector.body4136.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_699: # %vector.body4020 +.LBB5_699: # %vector.body4136 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_699 -# %bb.700: # %vector.body4025.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.700: # %vector.body4141.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_701: # %vector.body4025 +.LBB5_701: # %vector.body4141 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8997,30 +8860,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_701 b .LBB5_573 -.LBB5_702: # %vector.body4008.preheader +.LBB5_702: # %vector.body4124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_703: # %vector.body4008 +.LBB5_703: # %vector.body4124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_703 -# %bb.704: # %vector.body4013.preheader +# %bb.704: # %vector.body4129.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_705: # %vector.body4013 +.LBB5_705: # %vector.body4129 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9052,30 +8915,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_705 b .LBB5_573 -.LBB5_706: # %vector.body3996.preheader +.LBB5_706: # %vector.body4112.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_707: # %vector.body3996 +.LBB5_707: # %vector.body4112 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_707 -# %bb.708: # %vector.body4001.preheader +# %bb.708: # %vector.body4117.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_709: # %vector.body4001 +.LBB5_709: # %vector.body4117 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9107,7 +8970,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_709 b .LBB5_573 -.LBB5_710: # %vector.body3971.preheader +.LBB5_710: # %vector.body4081.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 @@ -9115,7 +8978,7 @@ init: # @init ori $a1, $s0, 3328 xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill -.LBB5_711: # %vector.body3971 +.LBB5_711: # %vector.body4081 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9123,63 +8986,60 @@ init: # @init addi.d $a1, $a1, -16 addi.d $a0, $a0, 64 bnez $a1, .LBB5_711 -# %bb.712: # %vector.body3976.preheader +# %bb.712: # %vector.body4086.preheader lu12i.w $a1, 31 - ori $a0, $a1, 1044 - add.d $a0, $fp, $a0 - ori $a2, $zero, 2 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a2, $a1, 1048 + add.d $a2, $fp, $a2 ori $a3, $s0, 3328 -.LBB5_713: # %vector.body3976 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_713: # %vector.body4086 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_713 -# %bb.714: # %vector.body3981.preheader - lu12i.w $a0, 62 - ori $a0, $a0, 2100 +# %bb.714: # %vector.body4094.preheader + lu12i.w $a2, 62 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a2, 2104 add.d $a0, $fp, $a0 - ori $a2, $zero, 2 - ori $a3, $s0, 3328 -.LBB5_715: # %vector.body3981 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_715 -# %bb.716: # %vector.body3986.preheader + ori $a2, $s0, 3328 +.LBB5_715: # %vector.body4094 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a0, $a0, 16 + bnez $a2, .LBB5_715 +# %bb.716: # %vector.body4102.preheader lu12i.w $a0, 93 ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 xvldi $xr0, -1296 -.LBB5_717: # %vector.body3986 +.LBB5_717: # %vector.body4102 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -9198,7 +9058,7 @@ init: # @init ori $a0, $a0, 2500 add.d $a0, $fp, $a0 ori $a1, $s0, 3328 -.LBB5_719: # %vector.body3991 +.LBB5_719: # %vector.body4107 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9207,238 +9067,225 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_719 b .LBB5_573 -.LBB5_720: # %vector.body3946.preheader +.LBB5_720: # %vector.body4044.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_721: # %vector.body3946 +.LBB5_721: # %vector.body4044 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_721 -# %bb.722: # %vector.body3951.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.722: # %vector.body4049.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_723: # %vector.body3951 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_723: # %vector.body4049 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_723 -# %bb.724: # %vector.body3956.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.724: # %vector.body4057.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_725: # %vector.body3956 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_725: # %vector.body4057 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_725 -# %bb.726: # %vector.body3961.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.726: # %vector.body4065.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_727: # %vector.body3961 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_727: # %vector.body4065 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_727 -# %bb.728: # %vector.body3966.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.728: # %vector.body4073.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_729: # %vector.body3966 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_729: # %vector.body4073 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_729 b .LBB5_573 -.LBB5_730: # %vector.body3931.preheader +.LBB5_730: # %vector.body4023.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_731: # %vector.body3931 +.LBB5_731: # %vector.body4023 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_731 -# %bb.732: # %vector.body3936.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.732: # %vector.body4028.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_733: # %vector.body3936 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_733: # %vector.body4028 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_733 -# %bb.734: # %vector.body3941.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.734: # %vector.body4036.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_735: # %vector.body3941 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_735: # %vector.body4036 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_735 b .LBB5_573 -.LBB5_736: # %vector.body3921.preheader +.LBB5_736: # %vector.body4007.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_737: # %vector.body3921 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_737: # %vector.body4007 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_737 -# %bb.738: # %vector.body3926.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.738: # %vector.body4015.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_739: # %vector.body3926 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_739: # %vector.body4015 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_739 b .LBB5_573 .LBB5_740: # %.preheader.i2608.preheader @@ -9455,14 +9302,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_741: # %vector.body3911 +.LBB5_741: # %vector.body3997 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_741 -# %bb.742: # %vector.body3916.preheader +# %bb.742: # %vector.body4002.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -9470,7 +9317,7 @@ init: # @init lu12i.w $a2, 219235 ori $a2, $a2, 1981 xvreplgr2vr.w $xr0, $a2 -.LBB5_743: # %vector.body3916 +.LBB5_743: # %vector.body4002 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -9488,14 +9335,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_745: # %vector.body3904 +.LBB5_745: # %vector.body3990 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9527,248 +9374,237 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_745 b .LBB5_573 -.LBB5_746: # %vector.body3879.preheader +.LBB5_746: # %vector.body3959.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_747: # %vector.body3879 +.LBB5_747: # %vector.body3959 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_747 -# %bb.748: # %vector.body3884.preheader +# %bb.748: # %vector.body3964.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_749: # %vector.body3884 +.LBB5_749: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_749 -# %bb.750: # %vector.body3889.preheader +# %bb.750: # %vector.body3969.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_751: # %vector.body3889 +.LBB5_751: # %vector.body3969 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_751 -# %bb.752: # %vector.body3894.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.752: # %vector.body3974.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_753: # %vector.body3894 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_753: # %vector.body3974 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_753 -# %bb.754: # %vector.body3899.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.754: # %vector.body3982.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_755: # %vector.body3899 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_755: # %vector.body3982 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_755 b .LBB5_573 -.LBB5_756: # %vector.body3859.preheader +.LBB5_756: # %vector.body3930.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_757: # %vector.body3859 +.LBB5_757: # %vector.body3930 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_757 -# %bb.758: # %vector.body3864.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.758: # %vector.body3935.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_759: # %vector.body3864 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_759: # %vector.body3935 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_759 -# %bb.760: # %vector.body3869.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.760: # %vector.body3943.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_761: # %vector.body3869 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_761: # %vector.body3943 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_761 -# %bb.762: # %vector.body3874.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.762: # %vector.body3951.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_763: # %vector.body3874 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_763: # %vector.body3951 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_763 b .LBB5_573 -.LBB5_764: # %vector.body3844.preheader +.LBB5_764: # %vector.body3909.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_765: # %vector.body3844 +.LBB5_765: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_765 -# %bb.766: # %vector.body3849.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.766: # %vector.body3914.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_767: # %vector.body3849 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_767: # %vector.body3914 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_767 -# %bb.768: # %vector.body3854.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.768: # %vector.body3922.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_769: # %vector.body3854 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_769: # %vector.body3922 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_769 b .LBB5_573 .LBB5_770: # %.preheader.i2684.preheader @@ -9785,103 +9621,99 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_771: # %vector.body3829 +.LBB5_771: # %vector.body3888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_771 -# %bb.772: # %vector.body3834.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.772: # %vector.body3893.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_773: # %vector.body3834 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_773: # %vector.body3893 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_773 -# %bb.774: # %vector.body3839.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.774: # %vector.body3901.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_775: # %vector.body3839 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_775: # %vector.body3901 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_775 b .LBB5_573 -.LBB5_776: # %vector.body3819.preheader +.LBB5_776: # %vector.body3875.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 32 - lu12i.w $a2, 7 - ori $a3, $a2, 3328 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_777: # %vector.body3819 +.LBB5_777: # %vector.body3875 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 - addi.d $a1, $a1, 64 + addi.d $a2, $a2, 64 bnez $a3, .LBB5_777 -# %bb.778: # %vector.body3824.preheader - lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a0, $a0, $a1 - ori $a1, $zero, 2 - ori $a2, $a2, 3328 -.LBB5_779: # %vector.body3824 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_779 +# %bb.778: # %vector.body3880.preheader + lu12i.w $a2, 31 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 1048 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_779: # %vector.body3880 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_779 b .LBB5_573 .LBB5_780: # %.preheader.i2711.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9897,23 +9729,23 @@ init: # @init lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_781: # %vector.body3807 +.LBB5_781: # %vector.body3863 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_781 -# %bb.782: # %vector.body3812.preheader +# %bb.782: # %vector.body3868.preheader lu12i.w $a0, 62 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) pcalau12i $a2, %pc_hi20(.LCPI5_1) - xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_2) + xvld $xr1, $a2, %pc_lo12(.LCPI5_2) ori $a0, $a0, 2096 add.d $a0, $fp, $a0 ori $a1, $a1, 3328 -.LBB5_783: # %vector.body3812 +.LBB5_783: # %vector.body3868 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9959,142 +9791,135 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_785: # %vector.body3792 +.LBB5_785: # %vector.body3842 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_785 -# %bb.786: # %vector.body3797.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.786: # %vector.body3847.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_787: # %vector.body3797 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_787: # %vector.body3847 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_787 -# %bb.788: # %vector.body3802.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.788: # %vector.body3855.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_789: # %vector.body3802 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_789: # %vector.body3855 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_789 b .LBB5_573 -.LBB5_790: # %vector.body3782.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_791: # %vector.body3782 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_790: # %vector.body3826.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_791: # %vector.body3826 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_791 -# %bb.792: # %vector.body3787.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_793: # %vector.body3787 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.792: # %vector.body3834.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_793: # %vector.body3834 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_793 b .LBB5_573 -.LBB5_794: # %vector.body3770.preheader +.LBB5_794: # %vector.body3811.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_795: # %vector.body3770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_795 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_795: # %vector.body3811 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_795 # %bb.796: # %.preheader34.i2757.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -10160,127 +9985,121 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_799: # %vector.body3755 +.LBB5_799: # %vector.body3790 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_799 -# %bb.800: # %vector.body3760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.800: # %vector.body3795.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_801: # %vector.body3760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_801: # %vector.body3795 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_801 -# %bb.802: # %vector.body3765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.802: # %vector.body3803.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_803: # %vector.body3765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_803: # %vector.body3803 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_803 b .LBB5_573 -.LBB5_804: # %vector.body3740.preheader +.LBB5_804: # %vector.body3769.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_805: # %vector.body3740 +.LBB5_805: # %vector.body3769 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_805 -# %bb.806: # %vector.body3745.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.806: # %vector.body3774.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_807: # %vector.body3745 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_807: # %vector.body3774 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_807 -# %bb.808: # %vector.body3750.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.808: # %vector.body3782.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_809: # %vector.body3750 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_809: # %vector.body3782 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_809 b .LBB5_573 .LBB5_810: # %.preheader.i2797.preheader @@ -10293,14 +10112,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_811: # %vector.body3733 +.LBB5_811: # %vector.body3762 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10342,14 +10161,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_813: # %vector.body3726 +.LBB5_813: # %vector.body3755 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10391,14 +10210,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_815: # %vector.body3719 +.LBB5_815: # %vector.body3748 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10440,14 +10259,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_817: # %vector.body3712 +.LBB5_817: # %vector.body3741 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10489,14 +10308,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_819: # %vector.body3705 +.LBB5_819: # %vector.body3734 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10528,26 +10347,26 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_819 b .LBB5_573 -.LBB5_820: # %vector.body3695.preheader +.LBB5_820: # %vector.body3724.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_821: # %vector.body3695 +.LBB5_821: # %vector.body3724 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_821 -# %bb.822: # %vector.body3700.preheader +# %bb.822: # %vector.body3729.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_823: # %vector.body3700 +.LBB5_823: # %vector.body3729 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10555,95 +10374,92 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_823 b .LBB5_573 -.LBB5_824: # %vector.body3680.preheader +.LBB5_824: # %vector.body3703.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_825: # %vector.body3680 +.LBB5_825: # %vector.body3703 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_825 -# %bb.826: # %vector.body3685.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.826: # %vector.body3708.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_827: # %vector.body3685 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_827: # %vector.body3708 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_827 -# %bb.828: # %vector.body3690.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.828: # %vector.body3716.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_829: # %vector.body3690 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_829: # %vector.body3716 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_829 b .LBB5_573 -.LBB5_830: # %vector.body3668.preheader +.LBB5_830: # %vector.body3691.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_831: # %vector.body3668 +.LBB5_831: # %vector.body3691 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_831 -# %bb.832: # %vector.body3673.preheader +# %bb.832: # %vector.body3696.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_833: # %vector.body3673 +.LBB5_833: # %vector.body3696 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10675,16 +10491,16 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_833 b .LBB5_573 -.LBB5_834: # %vector.body3653.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_834: # %vector.body3674.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_835: # %vector.body3653 +.LBB5_835: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10715,7 +10531,7 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_835 -# %bb.836: # %vector.body3658.preheader +# %bb.836: # %vector.body3681.preheader pcalau12i $a1, %pc_hi20(global_data) addi.d $a1, $a1, %pc_lo12(global_data) lu12i.w $a2, 31 @@ -10723,20 +10539,20 @@ init: # @init add.d $a2, $a1, $a2 ori $a3, $a0, 3328 xvldi $xr0, -1424 -.LBB5_837: # %vector.body3658 +.LBB5_837: # %vector.body3681 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_837 -# %bb.838: # %vector.body3663.preheader +# %bb.838: # %vector.body3686.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 xvldi $xr0, -1296 -.LBB5_839: # %vector.body3663 +.LBB5_839: # %vector.body3686 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -10744,40 +10560,40 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_839 b .LBB5_573 -.LBB5_840: # %vector.body3638.preheader +.LBB5_840: # %vector.body3659.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_841: # %vector.body3638 +.LBB5_841: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_841 -# %bb.842: # %vector.body3643.preheader +# %bb.842: # %vector.body3664.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -3264 -.LBB5_843: # %vector.body3643 +.LBB5_843: # %vector.body3664 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_843 -# %bb.844: # %vector.body3648.preheader +# %bb.844: # %vector.body3669.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3265 -.LBB5_845: # %vector.body3648 +.LBB5_845: # %vector.body3669 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10785,82 +10601,78 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_845 b .LBB5_573 -.LBB5_846: # %vector.body3633.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_847: # %vector.body3633 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_847 - b .LBB5_573 -.LBB5_848: # %vector.body3623.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 +.LBB5_846: # %vector.body3651.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_849: # %vector.body3623 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_849 -# %bb.850: # %vector.body3628.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_851: # %vector.body3628 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_847: # %vector.body3651 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_847 + b .LBB5_573 +.LBB5_848: # %vector.body3635.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_849: # %vector.body3635 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_849 +# %bb.850: # %vector.body3643.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_851: # %vector.body3643 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_851 b .LBB5_573 .Lfunc_end5: diff --git a/results/MultiSource/Benchmarks/TSVC/InductionVariable-flt/CMakeFiles/InductionVariable-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/InductionVariable-flt/CMakeFiles/InductionVariable-flt.dir/tsc.s index 394737a..3edf2ad 100644 --- a/results/MultiSource/Benchmarks/TSVC/InductionVariable-flt/CMakeFiles/InductionVariable-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/InductionVariable-flt/CMakeFiles/InductionVariable-flt.dir/tsc.s @@ -11,6 +11,11 @@ .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI0_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1d .p2align 5 @@ -63,30 +68,29 @@ set1d: # @set1d bnez $a1, .LBB0_3 b .LBB0_8 .LBB0_4: # %vector.body40.preheader - addi.d $a0, $a0, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a1, %pc_hi20(.LCPI0_2) + vld $vr0, $a1, %pc_lo12(.LCPI0_2) + addi.d $a0, $a0, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB0_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB0_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB0_5 b .LBB0_8 .LBB0_6: # %.preheader.preheader move $a2, $zero @@ -118,6 +122,11 @@ set1d: # @set1d .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI1_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1ds .p2align 5 @@ -170,30 +179,29 @@ set1ds: # @set1ds bnez $a0, .LBB1_3 b .LBB1_8 .LBB1_4: # %vector.body40.preheader - addi.d $a0, $a1, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) + addi.d $a0, $a1, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB1_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB1_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB1_5 b .LBB1_8 .LBB1_6: # %.preheader.preheader move $a0, $zero @@ -685,14 +693,19 @@ check: # @check .word .LBB4_14-.LJTI4_0 .word .LBB4_17-.LJTI4_0 # -- End function - .section .rodata.cst32,"aM",@progbits,32 - .p2align 5, 0x0 # -- Begin function init + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 # -- Begin function init .LCPI5_0: + .dword 0 # 0x0 + .dword 1 # 0x1 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 +.LCPI5_1: .dword 4 # 0x4 .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 -.LCPI5_1: +.LCPI5_2: .dword 0 # 0x0 .dword 1 # 0x1 .dword 2 # 0x2 @@ -703,11 +716,12 @@ check: # @check .type init,@function init: # @init # %bb.0: - addi.d $sp, $sp, -80 - st.d $ra, $sp, 72 # 8-byte Folded Spill - st.d $fp, $sp, 64 # 8-byte Folded Spill - st.d $s0, $sp, 56 # 8-byte Folded Spill - st.d $s1, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -96 + st.d $ra, $sp, 88 # 8-byte Folded Spill + st.d $fp, $sp, 80 # 8-byte Folded Spill + st.d $s0, $sp, 72 # 8-byte Folded Spill + st.d $s1, $sp, 64 # 8-byte Folded Spill + st.d $s2, $sp, 56 # 8-byte Folded Spill move $fp, $a0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a1, $a0, %pc_lo12(.L.str.1) @@ -1661,131 +1675,122 @@ init: # @init jirl $ra, $ra, 0 bnez $a0, .LBB5_573 # %bb.136: # %vector.body.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 + addi.d $a3, $a0, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 .LBB5_137: # %vector.body # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_137 # %bb.138: # %vector.body3598.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 + lu12i.w $a3, 31 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 .LBB5_139: # %vector.body3598 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_139 -# %bb.140: # %vector.body3603.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_141: # %vector.body3603 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.140: # %vector.body3606.preheader + lu12i.w $a3, 62 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_141: # %vector.body3606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_141 -# %bb.142: # %vector.body3608.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_143: # %vector.body3608 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.142: # %vector.body3614.preheader + lu12i.w $a3, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_143: # %vector.body3614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_143 -# %bb.144: # %vector.body3613.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_145: # %vector.body3613 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 - bnez $a1, .LBB5_145 +# %bb.144: # %vector.body3622.preheader + lu12i.w $a3, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a3, 136 + add.d $a1, $a0, $a1 + ori $a2, $a2, 3328 +.LBB5_145: # %vector.body3622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_145 # %bb.146: # %.preheader34.i2938.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -1837,7 +1842,7 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_147 b .LBB5_573 -.LBB5_148: # %vector.body5445.preheader +.LBB5_148: # %vector.body5846.preheader pcalau12i $a0, %pc_hi20(X+4) addi.d $a0, $a0, %pc_lo12(X+4) pcalau12i $a1, %pc_hi20(V+4) @@ -1852,7 +1857,7 @@ init: # @init lu12i.w $a6, 7 ori $a6, $a6, 3328 .p2align 4, , 16 -.LBB5_149: # %vector.body5445 +.LBB5_149: # %vector.body5846 # =>This Inner Loop Header: Depth=1 bstrpick.d $a7, $a5, 31, 0 addi.d $t0, $a5, -5 @@ -1896,7 +1901,7 @@ init: # @init addi.d $a4, $a4, 8 bnez $a6, .LBB5_149 b .LBB5_573 -.LBB5_150: # %vector.body5412.preheader +.LBB5_150: # %vector.body5813.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -1904,24 +1909,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_151: # %vector.body5412 +.LBB5_151: # %vector.body5813 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_151 -# %bb.152: # %vector.body5417.preheader +# %bb.152: # %vector.body5818.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_153: # %vector.body5417 +.LBB5_153: # %vector.body5818 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1952,15 +1957,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_153 -# %bb.154: # %vector.body5424.preheader +# %bb.154: # %vector.body5825.preheader lu12i.w $a4, 62 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 2096 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_155: # %vector.body5424 +.LBB5_155: # %vector.body5825 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1991,15 +1996,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_155 -# %bb.156: # %vector.body5431.preheader +# %bb.156: # %vector.body5832.preheader lu12i.w $a4, 93 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 3152 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_157: # %vector.body5431 +.LBB5_157: # %vector.body5832 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2030,15 +2035,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_157 -# %bb.158: # %vector.body5438.preheader +# %bb.158: # %vector.body5839.preheader lu12i.w $a4, 125 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_159: # %vector.body5438 +.LBB5_159: # %vector.body5839 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2070,7 +2075,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_159 b .LBB5_573 -.LBB5_160: # %vector.body5400.preheader +.LBB5_160: # %vector.body5801.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2078,24 +2083,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_161: # %vector.body5400 +.LBB5_161: # %vector.body5801 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_161 -# %bb.162: # %vector.body5405.preheader +# %bb.162: # %vector.body5806.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_163: # %vector.body5405 +.LBB5_163: # %vector.body5806 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2127,7 +2132,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_163 b .LBB5_573 -.LBB5_164: # %vector.body5388.preheader +.LBB5_164: # %vector.body5789.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2135,24 +2140,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_165: # %vector.body5388 +.LBB5_165: # %vector.body5789 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_165 -# %bb.166: # %vector.body5393.preheader +# %bb.166: # %vector.body5794.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_167: # %vector.body5393 +.LBB5_167: # %vector.body5794 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2288,7 +2293,7 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_171 b .LBB5_573 -.LBB5_172: # %vector.body5354.preheader +.LBB5_172: # %vector.body5755.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2296,7 +2301,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_173: # %vector.body5354 +.LBB5_173: # %vector.body5755 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2436,14 +2441,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_179 b .LBB5_573 -.LBB5_180: # %vector.body5349.preheader +.LBB5_180: # %vector.body5750.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_181: # %vector.body5349 +.LBB5_181: # %vector.body5750 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -2451,7 +2456,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_181 b .LBB5_573 -.LBB5_182: # %vector.body5339.preheader +.LBB5_182: # %vector.body5740.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2459,7 +2464,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_183: # %vector.body5339 +.LBB5_183: # %vector.body5740 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2610,30 +2615,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_189 b .LBB5_573 -.LBB5_190: # %vector.body5315.preheader +.LBB5_190: # %vector.body5716.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_191: # %vector.body5315 +.LBB5_191: # %vector.body5716 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_191 -# %bb.192: # %vector.body5320.preheader +# %bb.192: # %vector.body5721.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_193: # %vector.body5320 +.LBB5_193: # %vector.body5721 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2665,30 +2670,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_193 b .LBB5_573 -.LBB5_194: # %vector.body5303.preheader +.LBB5_194: # %vector.body5704.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_195: # %vector.body5303 +.LBB5_195: # %vector.body5704 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_195 -# %bb.196: # %vector.body5308.preheader +# %bb.196: # %vector.body5709.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_197: # %vector.body5308 +.LBB5_197: # %vector.body5709 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2734,74 +2739,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_199: # %vector.body5283 +.LBB5_199: # %vector.body5678 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_199 -# %bb.200: # %vector.body5288.preheader +# %bb.200: # %vector.body5683.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_201: # %vector.body5288 +.LBB5_201: # %vector.body5683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_201 -# %bb.202: # %vector.body5293.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.202: # %vector.body5688.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_203: # %vector.body5293 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_203: # %vector.body5688 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_203 -# %bb.204: # %vector.body5298.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.204: # %vector.body5696.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_205: # %vector.body5298 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_205: # %vector.body5696 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_205 b .LBB5_573 .LBB5_206: # %.preheader.i1195.preheader @@ -2818,74 +2820,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_207: # %vector.body5263 +.LBB5_207: # %vector.body5652 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_207 -# %bb.208: # %vector.body5268.preheader +# %bb.208: # %vector.body5657.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_209: # %vector.body5268 +.LBB5_209: # %vector.body5657 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_209 -# %bb.210: # %vector.body5273.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.210: # %vector.body5662.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_211: # %vector.body5273 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_211: # %vector.body5662 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_211 -# %bb.212: # %vector.body5278.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.212: # %vector.body5670.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_213: # %vector.body5278 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_213: # %vector.body5670 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_213 b .LBB5_573 .LBB5_214: # %.preheader.i1217.preheader @@ -3072,31 +3071,30 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_221 -# %bb.222: # %vector.body5236.preheader - pcalau12i $a1, %pc_hi20(array+4) - addi.d $a1, $a1, %pc_lo12(array+4) - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_223: # %vector.body5236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_223 +# %bb.222: # %vector.body5622.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(array+8) + addi.d $a1, $a1, %pc_lo12(array+8) + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_223: # %vector.body5622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_223 # %bb.224: # %.preheader34.i1253.preheader lu12i.w $a1, 284 ori $a1, $a1, 1888 @@ -3162,87 +3160,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_227: # %vector.body5211 +.LBB5_227: # %vector.body5588 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_227 -# %bb.228: # %vector.body5216.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.228: # %vector.body5593.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_229: # %vector.body5216 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_229: # %vector.body5593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_229 -# %bb.230: # %vector.body5221.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.230: # %vector.body5601.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_231: # %vector.body5221 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_231: # %vector.body5601 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_231 -# %bb.232: # %vector.body5226.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.232: # %vector.body5609.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_233: # %vector.body5226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_233: # %vector.body5609 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_233 b .LBB5_573 .LBB5_234: # %.preheader.i1284.preheader @@ -3259,32 +3252,32 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -3264 -.LBB5_235: # %vector.body5196 +.LBB5_235: # %vector.body5573 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_235 -# %bb.236: # %vector.body5201.preheader +# %bb.236: # %vector.body5578.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_237: # %vector.body5201 +.LBB5_237: # %vector.body5578 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_237 -# %bb.238: # %vector.body5206.preheader +# %bb.238: # %vector.body5583.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_239: # %vector.body5206 +.LBB5_239: # %vector.body5583 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -3292,30 +3285,30 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_239 b .LBB5_573 -.LBB5_240: # %vector.body5184.preheader +.LBB5_240: # %vector.body5561.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_241: # %vector.body5184 +.LBB5_241: # %vector.body5561 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_241 -# %bb.242: # %vector.body5189.preheader +# %bb.242: # %vector.body5566.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_243: # %vector.body5189 +.LBB5_243: # %vector.body5566 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3392,65 +3385,62 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_245 -# %bb.246: # %vector.body5174.preheader +# %bb.246: # %vector.body5545.preheader lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_247: # %vector.body5174 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_247: # %vector.body5545 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_247 -# %bb.248: # %vector.body5179.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.248: # %vector.body5553.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_249: # %vector.body5179 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_249: # %vector.body5553 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_249 b .LBB5_573 -.LBB5_250: # %vector.body5157.preheader +.LBB5_250: # %vector.body5528.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_251: # %vector.body5157 +.LBB5_251: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3506,30 +3496,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_253 b .LBB5_573 -.LBB5_254: # %vector.body5145.preheader +.LBB5_254: # %vector.body5516.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_255: # %vector.body5145 +.LBB5_255: # %vector.body5516 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_255 -# %bb.256: # %vector.body5150.preheader +# %bb.256: # %vector.body5521.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_257: # %vector.body5150 +.LBB5_257: # %vector.body5521 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3561,14 +3551,14 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_257 b .LBB5_573 -.LBB5_258: # %vector.body5125.preheader +.LBB5_258: # %vector.body5487.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 xvldi $xr0, -1424 -.LBB5_259: # %vector.body5125 +.LBB5_259: # %vector.body5487 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3583,274 +3573,263 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 + lu12i.w $a1, 62 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 2104 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_261: # %vector.body5130 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_261: # %vector.body5492 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_261 -# %bb.262: # %vector.body5135.preheader - lu12i.w $a0, 93 - ori $a0, $a0, 3156 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 +# %bb.262: # %vector.body5500.preheader + lu12i.w $a1, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 3160 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_263: # %vector.body5135 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_263: # %vector.body5500 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_263 -# %bb.264: # %vector.body5140.preheader - lu12i.w $a0, 125 - ori $a0, $a0, 132 +# %bb.264: # %vector.body5508.preheader + lu12i.w $a1, 125 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a1, 136 add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - ori $a2, $s0, 3328 -.LBB5_265: # %vector.body5140 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_265 + ori $a1, $s0, 3328 +.LBB5_265: # %vector.body5508 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_265 b .LBB5_573 -.LBB5_266: # %vector.body5094.preheader +.LBB5_266: # %vector.body5450.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_267: # %vector.body5094 +.LBB5_267: # %vector.body5450 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_267 -# %bb.268: # %vector.body5099.preheader +# %bb.268: # %vector.body5455.preheader lu12i.w $a2, 31 ori $a3, $a2, 1048 add.d $a4, $a0, $a3 lu12i.w $a3, 3 ori $a5, $a3, 3712 lu12i.w $a6, 260096 -.LBB5_269: # %vector.body5099 +.LBB5_269: # %vector.body5455 # =>This Inner Loop Header: Depth=1 st.w $a6, $a4, -8 st.w $a6, $a4, 0 addi.d $a5, $a5, -2 addi.d $a4, $a4, 16 bnez $a5, .LBB5_269 -# %bb.270: # %vector.body5104.preheader +# %bb.270: # %vector.body5460.preheader ori $a2, $a2, 1052 add.d $a2, $a0, $a2 ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 -.LBB5_271: # %vector.body5104 +.LBB5_271: # %vector.body5460 # =>This Inner Loop Header: Depth=1 st.w $a4, $a2, -8 st.w $a4, $a2, 0 addi.d $a3, $a3, -2 addi.d $a2, $a2, 16 bnez $a3, .LBB5_271 -# %bb.272: # %vector.body5110.preheader +# %bb.272: # %vector.body5466.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_273: # %vector.body5110 +.LBB5_273: # %vector.body5466 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_273 -# %bb.274: # %vector.body5115.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.274: # %vector.body5471.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_275: # %vector.body5115 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_275: # %vector.body5471 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_275 -# %bb.276: # %vector.body5120.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.276: # %vector.body5479.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_277: # %vector.body5120 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_277: # %vector.body5479 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_277 b .LBB5_573 -.LBB5_278: # %vector.body5079.preheader +.LBB5_278: # %vector.body5429.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_279: # %vector.body5079 +.LBB5_279: # %vector.body5429 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_279 -# %bb.280: # %vector.body5084.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.280: # %vector.body5434.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_281: # %vector.body5084 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_281: # %vector.body5434 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_281 -# %bb.282: # %vector.body5089.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.282: # %vector.body5442.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_283: # %vector.body5089 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_283: # %vector.body5442 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_283 b .LBB5_573 -.LBB5_284: # %vector.body5067.preheader +.LBB5_284: # %vector.body5417.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_285: # %vector.body5067 +.LBB5_285: # %vector.body5417 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_285 -# %bb.286: # %vector.body5072.preheader +# %bb.286: # %vector.body5422.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_287: # %vector.body5072 +.LBB5_287: # %vector.body5422 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3882,30 +3861,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_287 b .LBB5_573 -.LBB5_288: # %vector.body5055.preheader +.LBB5_288: # %vector.body5405.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_289: # %vector.body5055 +.LBB5_289: # %vector.body5405 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_289 -# %bb.290: # %vector.body5060.preheader +# %bb.290: # %vector.body5410.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_291: # %vector.body5060 +.LBB5_291: # %vector.body5410 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3937,30 +3916,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_291 b .LBB5_573 -.LBB5_292: # %vector.body5043.preheader +.LBB5_292: # %vector.body5393.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_293: # %vector.body5043 +.LBB5_293: # %vector.body5393 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_293 -# %bb.294: # %vector.body5048.preheader +# %bb.294: # %vector.body5398.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_295: # %vector.body5048 +.LBB5_295: # %vector.body5398 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3992,30 +3971,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_295 b .LBB5_573 -.LBB5_296: # %vector.body5031.preheader +.LBB5_296: # %vector.body5381.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_297: # %vector.body5031 +.LBB5_297: # %vector.body5381 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_297 -# %bb.298: # %vector.body5036.preheader +# %bb.298: # %vector.body5386.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_299: # %vector.body5036 +.LBB5_299: # %vector.body5386 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4047,30 +4026,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_299 b .LBB5_573 -.LBB5_300: # %vector.body5019.preheader +.LBB5_300: # %vector.body5369.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_301: # %vector.body5019 +.LBB5_301: # %vector.body5369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_301 -# %bb.302: # %vector.body5024.preheader +# %bb.302: # %vector.body5374.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_303: # %vector.body5024 +.LBB5_303: # %vector.body5374 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4102,69 +4081,66 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_303 b .LBB5_573 -.LBB5_304: # %vector.body5004.preheader +.LBB5_304: # %vector.body5348.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_305: # %vector.body5004 +.LBB5_305: # %vector.body5348 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_305 -# %bb.306: # %vector.body5009.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.306: # %vector.body5353.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_307: # %vector.body5009 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_307: # %vector.body5353 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_307 -# %bb.308: # %vector.body5014.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.308: # %vector.body5361.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_309: # %vector.body5014 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_309: # %vector.body5361 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_309 b .LBB5_573 .LBB5_310: # %.preheader.i1469.preheader @@ -4181,254 +4157,241 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_311: # %vector.body4984 +.LBB5_311: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_311 -# %bb.312: # %vector.body4989.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.312: # %vector.body5324.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_313: # %vector.body4989 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_313: # %vector.body5324 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_313 -# %bb.314: # %vector.body4994.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.314: # %vector.body5332.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_315: # %vector.body4994 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_315: # %vector.body5332 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_315 -# %bb.316: # %vector.body4999.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.316: # %vector.body5340.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_317: # %vector.body4999 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_317: # %vector.body5340 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_317 b .LBB5_573 -.LBB5_318: # %vector.body4964.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_319: # %vector.body4964 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_318: # %vector.body5293.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_319: # %vector.body5293 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_319 -# %bb.320: # %vector.body4969.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1072 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 +# %bb.320: # %vector.body5301.preheader + lu12i.w $a3, 31 + ori $a3, $a3, 1072 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 xvldi $xr0, -1424 -.LBB5_321: # %vector.body4969 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_321 -# %bb.322: # %vector.body4974.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2128 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 -.LBB5_323: # %vector.body4974 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_323 -# %bb.324: # %vector.body4979.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_325: # %vector.body4979 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_321: # %vector.body5301 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_321 +# %bb.322: # %vector.body5306.preheader + lu12i.w $a3, 62 + ori $a3, $a3, 2128 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 +.LBB5_323: # %vector.body5306 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_323 +# %bb.324: # %vector.body5311.preheader + lu12i.w $a3, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 3160 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_325: # %vector.body5311 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_325 b .LBB5_573 -.LBB5_326: # %vector.body4944.preheader +.LBB5_326: # %vector.body5264.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_327: # %vector.body4944 +.LBB5_327: # %vector.body5264 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_327 -# %bb.328: # %vector.body4949.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.328: # %vector.body5269.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_329: # %vector.body4949 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_329: # %vector.body5269 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_329 -# %bb.330: # %vector.body4954.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.330: # %vector.body5277.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_331: # %vector.body4954 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_331: # %vector.body5277 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_331 -# %bb.332: # %vector.body4959.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.332: # %vector.body5285.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_333: # %vector.body4959 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_333: # %vector.body5285 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_333 b .LBB5_573 .LBB5_334: # %.preheader.i1529.preheader @@ -4445,19 +4408,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_335: # %vector.body4934 +.LBB5_335: # %vector.body5254 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_335 -# %bb.336: # %vector.body4939.preheader +# %bb.336: # %vector.body5259.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_337: # %vector.body4939 +.LBB5_337: # %vector.body5259 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -4915,69 +4878,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_358 b .LBB5_573 -.LBB5_359: # %vector.body4850.preheader +.LBB5_359: # %vector.body5164.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_360: # %vector.body4850 +.LBB5_360: # %vector.body5164 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_360 -# %bb.361: # %vector.body4855.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.361: # %vector.body5169.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_362: # %vector.body4855 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a5 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_362: # %vector.body5169 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a3, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_362 -# %bb.363: # %vector.body4860.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.363: # %vector.body5177.preheader + lu12i.w $a3, 62 + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 ori $a1, $a1, 3328 -.LBB5_364: # %vector.body4860 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a4 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 +.LBB5_364: # %vector.body5177 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a2, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a1, $a1, -4 + addi.d $a2, $a2, 16 bnez $a1, .LBB5_364 # %bb.365: # %.preheader.i1631.preheader lu12i.w $a1, 156 @@ -5071,50 +5031,50 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_368 b .LBB5_573 -.LBB5_369: # %vector.body4830.preheader +.LBB5_369: # %vector.body5144.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_370: # %vector.body4830 +.LBB5_370: # %vector.body5144 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_370 -# %bb.371: # %vector.body4835.preheader +# %bb.371: # %vector.body5149.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_372: # %vector.body4835 +.LBB5_372: # %vector.body5149 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_372 -# %bb.373: # %vector.body4840.preheader +# %bb.373: # %vector.body5154.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_374: # %vector.body4840 +.LBB5_374: # %vector.body5154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_374 -# %bb.375: # %vector.body4845.preheader +# %bb.375: # %vector.body5159.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_376: # %vector.body4845 +.LBB5_376: # %vector.body5159 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5122,7 +5082,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_376 b .LBB5_573 -.LBB5_377: # %vector.body4810.preheader +.LBB5_377: # %vector.body5124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -5131,43 +5091,43 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_378: # %vector.body4810 +.LBB5_378: # %vector.body5124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_378 -# %bb.379: # %vector.body4815.preheader +# %bb.379: # %vector.body5129.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_380: # %vector.body4815 +.LBB5_380: # %vector.body5129 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_380 -# %bb.381: # %vector.body4820.preheader +# %bb.381: # %vector.body5134.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_382: # %vector.body4820 +.LBB5_382: # %vector.body5134 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_382 -# %bb.383: # %vector.body4825.preheader +# %bb.383: # %vector.body5139.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_384: # %vector.body4825 +.LBB5_384: # %vector.body5139 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5189,87 +5149,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_386: # %vector.body4790 +.LBB5_386: # %vector.body5095 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_386 -# %bb.387: # %vector.body4795.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.387: # %vector.body5100.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_388: # %vector.body4795 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_388: # %vector.body5100 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_388 -# %bb.389: # %vector.body4800.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.389: # %vector.body5108.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_390: # %vector.body4800 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_390: # %vector.body5108 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_390 -# %bb.391: # %vector.body4805.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.391: # %vector.body5116.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_392: # %vector.body4805 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_392: # %vector.body5116 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_392 b .LBB5_573 .LBB5_393: # %.preheader.i1701.preheader @@ -5286,14 +5241,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_394: # %vector.body4775 +.LBB5_394: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_394 -# %bb.395: # %vector.body4780.preheader +# %bb.395: # %vector.body5085.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -5301,19 +5256,19 @@ init: # @init lu12i.w $a3, 219235 ori $a3, $a3, 1981 xvreplgr2vr.w $xr0, $a3 -.LBB5_396: # %vector.body4780 +.LBB5_396: # %vector.body5085 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_396 -# %bb.397: # %vector.body4785.preheader +# %bb.397: # %vector.body5090.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_398: # %vector.body4785 +.LBB5_398: # %vector.body5090 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5335,87 +5290,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_400: # %vector.body4755 +.LBB5_400: # %vector.body5051 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_400 -# %bb.401: # %vector.body4760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.401: # %vector.body5056.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_402: # %vector.body4760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_402: # %vector.body5056 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_402 -# %bb.403: # %vector.body4765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.403: # %vector.body5064.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_404: # %vector.body4765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_404: # %vector.body5064 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_404 -# %bb.405: # %vector.body4770.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.405: # %vector.body5072.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_406: # %vector.body4770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_406: # %vector.body5072 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_406 b .LBB5_573 .LBB5_407: # %.preheader.i1740.preheader @@ -5432,19 +5382,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_408: # %vector.body4745 +.LBB5_408: # %vector.body5041 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_408 -# %bb.409: # %vector.body4750.preheader +# %bb.409: # %vector.body5046.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_410: # %vector.body4750 +.LBB5_410: # %vector.body5046 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5452,21 +5402,21 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_410 b .LBB5_573 -.LBB5_411: # %vector.body4725.preheader +.LBB5_411: # %vector.body5018.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_412: # %vector.body4725 +.LBB5_412: # %vector.body5018 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_412 -# %bb.413: # %vector.body4730.preheader +# %bb.413: # %vector.body5023.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 @@ -5474,49 +5424,48 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr1, $a4 -.LBB5_414: # %vector.body4730 +.LBB5_414: # %vector.body5023 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a2, -32 xvst $xr1, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_414 -# %bb.415: # %vector.body4735.preheader +# %bb.415: # %vector.body5028.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_416: # %vector.body4735 +.LBB5_416: # %vector.body5028 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_416 -# %bb.417: # %vector.body4740.preheader +# %bb.417: # %vector.body5033.preheader lu12i.w $a2, 93 - ori $a2, $a2, 3156 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_418: # %vector.body4740 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_418: # %vector.body5033 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_418 b .LBB5_573 .LBB5_419: # %.preheader.i1769.preheader @@ -5533,7 +5482,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_420: # %vector.body4720 +.LBB5_420: # %vector.body5013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5555,7 +5504,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_422: # %vector.body4715 +.LBB5_422: # %vector.body5008 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5563,14 +5512,14 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_422 b .LBB5_573 -.LBB5_423: # %vector.body4700.preheader +.LBB5_423: # %vector.body4993.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_424: # %vector.body4700 +.LBB5_424: # %vector.body4993 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5663,14 +5612,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_428 b .LBB5_573 -.LBB5_429: # %vector.body4685.preheader +.LBB5_429: # %vector.body4978.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_430: # %vector.body4685 +.LBB5_430: # %vector.body4978 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5763,32 +5712,31 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_434 b .LBB5_573 -.LBB5_435: # %vector.body4663.preheader +.LBB5_435: # %vector.body4947.preheader + pcalau12i $s1, %pc_hi20(.LCPI5_0) + vld $vr0, $s1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) - addi.d $a0, $s0, 4 - ori $a1, $zero, 2 - lu12i.w $s1, 7 - ori $a2, $s1, 3328 -.LBB5_436: # %vector.body4663 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_436 + addi.d $a0, $s0, 8 + lu12i.w $s2, 7 + ori $a1, $s2, 3328 +.LBB5_436: # %vector.body4947 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_436 # %bb.437: # %.preheader.i1826.preheader lu12i.w $a1, 31 ori $a0, $a1, 1040 @@ -5799,54 +5747,50 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 2104 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_438: # %vector.body4668 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_438 -# %bb.439: # %vector.body4673.preheader + ori $a1, $s2, 3328 +.LBB5_438: # %vector.body4955 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_438 +# %bb.439: # %vector.body4963.preheader lu12i.w $a0, 93 - ori $a0, $a0, 3156 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 3160 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_440: # %vector.body4673 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_440 + ori $a1, $s2, 3328 +.LBB5_440: # %vector.body4963 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_440 # %bb.441: # %.preheader.i1840.preheader lu12i.w $a0, 125 ori $a0, $a0, 128 @@ -5905,30 +5849,30 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_442 b .LBB5_573 -.LBB5_443: # %vector.body4639.preheader +.LBB5_443: # %vector.body4923.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_444: # %vector.body4639 +.LBB5_444: # %vector.body4923 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_444 -# %bb.445: # %vector.body4644.preheader +# %bb.445: # %vector.body4928.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 -.LBB5_446: # %vector.body4644 +.LBB5_446: # %vector.body4928 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5959,14 +5903,14 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_446 -# %bb.447: # %vector.body4651.preheader +# %bb.447: # %vector.body4935.preheader lu12i.w $a4, 62 - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 2096 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_448: # %vector.body4651 +.LBB5_448: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5997,12 +5941,12 @@ init: # @init addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 bnez $a3, .LBB5_448 -# %bb.449: # %vector.body4658.preheader +# %bb.449: # %vector.body4942.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_450: # %vector.body4658 +.LBB5_450: # %vector.body4942 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6010,154 +5954,148 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_450 b .LBB5_573 -.LBB5_451: # %vector.body4624.preheader +.LBB5_451: # %vector.body4902.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_452: # %vector.body4624 +.LBB5_452: # %vector.body4902 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_452 -# %bb.453: # %vector.body4629.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.453: # %vector.body4907.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_454: # %vector.body4629 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_454: # %vector.body4907 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_454 -# %bb.455: # %vector.body4634.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.455: # %vector.body4915.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_456: # %vector.body4634 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_456: # %vector.body4915 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_456 b .LBB5_573 -.LBB5_457: # %vector.body4599.preheader +.LBB5_457: # %vector.body4871.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_458: # %vector.body4599 +.LBB5_458: # %vector.body4871 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_458 -# %bb.459: # %vector.body4604.preheader +# %bb.459: # %vector.body4876.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_460: # %vector.body4604 +.LBB5_460: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_460 -# %bb.461: # %vector.body4609.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.461: # %vector.body4881.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_462: # %vector.body4609 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_462: # %vector.body4881 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_462 -# %bb.463: # %vector.body4614.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.463: # %vector.body4889.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_464: # %vector.body4614 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_464 -# %bb.465: # %vector.body4619.preheader + ori $a3, $a1, 3328 +.LBB5_464: # %vector.body4889 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 + bnez $a3, .LBB5_464 +# %bb.465: # %vector.body4897.preheader lu12i.w $a2, 125 ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3264 -.LBB5_466: # %vector.body4619 +.LBB5_466: # %vector.body4897 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6165,45 +6103,45 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_466 b .LBB5_573 -.LBB5_467: # %vector.body4574.preheader +.LBB5_467: # %vector.body4843.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_468: # %vector.body4574 +.LBB5_468: # %vector.body4843 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_468 -# %bb.469: # %vector.body4579.preheader +# %bb.469: # %vector.body4848.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_470: # %vector.body4579 +.LBB5_470: # %vector.body4848 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_470 -# %bb.471: # %vector.body4584.preheader +# %bb.471: # %vector.body4853.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_472: # %vector.body4584 +.LBB5_472: # %vector.body4853 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_472 -# %bb.473: # %vector.body4589.preheader +# %bb.473: # %vector.body4858.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a2, $a0, $a2 @@ -6211,37 +6149,36 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_474: # %vector.body4589 +.LBB5_474: # %vector.body4858 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_474 -# %bb.475: # %vector.body4594.preheader +# %bb.475: # %vector.body4863.preheader lu12i.w $a2, 125 - ori $a2, $a2, 132 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_476: # %vector.body4594 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_476: # %vector.body4863 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_476 b .LBB5_573 .LBB5_477: # %.preheader.i1927.preheader @@ -6258,74 +6195,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_478: # %vector.body4554 +.LBB5_478: # %vector.body4817 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_478 -# %bb.479: # %vector.body4559.preheader +# %bb.479: # %vector.body4822.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_480: # %vector.body4559 +.LBB5_480: # %vector.body4822 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_480 -# %bb.481: # %vector.body4564.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.481: # %vector.body4827.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_482: # %vector.body4564 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_482: # %vector.body4827 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_482 -# %bb.483: # %vector.body4569.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.483: # %vector.body4835.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_484: # %vector.body4569 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_484: # %vector.body4835 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_484 b .LBB5_573 .LBB5_485: # %.preheader.i1949.preheader @@ -6461,669 +6395,638 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_490 b .LBB5_573 -.LBB5_491: # %vector.body4519.preheader +.LBB5_491: # %vector.body4773.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_492: # %vector.body4519 +.LBB5_492: # %vector.body4773 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_492 -# %bb.493: # %vector.body4524.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.493: # %vector.body4778.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_494: # %vector.body4524 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_494: # %vector.body4778 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_494 -# %bb.495: # %vector.body4529.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.495: # %vector.body4786.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_496: # %vector.body4529 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_496: # %vector.body4786 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_496 -# %bb.497: # %vector.body4534.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.497: # %vector.body4794.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_498: # %vector.body4534 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_498: # %vector.body4794 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_498 b .LBB5_573 -.LBB5_499: # %vector.body4489.preheader +.LBB5_499: # %vector.body4734.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_500: # %vector.body4489 +.LBB5_500: # %vector.body4734 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_500 -# %bb.501: # %vector.body4494.preheader +# %bb.501: # %vector.body4739.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_502: # %vector.body4494 +.LBB5_502: # %vector.body4739 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_502 -# %bb.503: # %vector.body4499.preheader +# %bb.503: # %vector.body4744.preheader lu12i.w $a2, 46 ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_504: # %vector.body4499 +.LBB5_504: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_504 -# %bb.505: # %vector.body4504.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.505: # %vector.body4749.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_506: # %vector.body4504 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_506: # %vector.body4749 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_506 -# %bb.507: # %vector.body4509.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.507: # %vector.body4757.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_508: # %vector.body4509 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_508: # %vector.body4757 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_508 -# %bb.509: # %vector.body4514.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.509: # %vector.body4765.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_510: # %vector.body4514 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_510: # %vector.body4765 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_510 b .LBB5_573 -.LBB5_511: # %vector.body4459.preheader +.LBB5_511: # %vector.body4695.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_512: # %vector.body4459 +.LBB5_512: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_512 -# %bb.513: # %vector.body4464.preheader +# %bb.513: # %vector.body4700.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_514: # %vector.body4464 +.LBB5_514: # %vector.body4700 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_514 -# %bb.515: # %vector.body4469.preheader +# %bb.515: # %vector.body4705.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_516: # %vector.body4469 +.LBB5_516: # %vector.body4705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_516 -# %bb.517: # %vector.body4474.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.517: # %vector.body4710.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_518: # %vector.body4474 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_518: # %vector.body4710 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_518 -# %bb.519: # %vector.body4479.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.519: # %vector.body4718.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_520: # %vector.body4479 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_520: # %vector.body4718 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_520 -# %bb.521: # %vector.body4484.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.521: # %vector.body4726.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_522: # %vector.body4484 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_522: # %vector.body4726 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_522 b .LBB5_573 -.LBB5_523: # %vector.body4429.preheader +.LBB5_523: # %vector.body4656.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_524: # %vector.body4429 +.LBB5_524: # %vector.body4656 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_524 -# %bb.525: # %vector.body4434.preheader +# %bb.525: # %vector.body4661.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_526: # %vector.body4434 +.LBB5_526: # %vector.body4661 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_526 -# %bb.527: # %vector.body4439.preheader +# %bb.527: # %vector.body4666.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_528: # %vector.body4439 +.LBB5_528: # %vector.body4666 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_528 -# %bb.529: # %vector.body4444.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.529: # %vector.body4671.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_530: # %vector.body4444 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_530: # %vector.body4671 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_530 -# %bb.531: # %vector.body4449.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.531: # %vector.body4679.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_532: # %vector.body4449 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_532: # %vector.body4679 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_532 -# %bb.533: # %vector.body4454.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.533: # %vector.body4687.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_534: # %vector.body4454 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_534: # %vector.body4687 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_534 b .LBB5_573 -.LBB5_535: # %vector.body4404.preheader +.LBB5_535: # %vector.body4622.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_536: # %vector.body4404 +.LBB5_536: # %vector.body4622 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_536 -# %bb.537: # %vector.body4409.preheader +# %bb.537: # %vector.body4627.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_538: # %vector.body4409 +.LBB5_538: # %vector.body4627 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_538 -# %bb.539: # %vector.body4414.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.539: # %vector.body4632.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_540: # %vector.body4414 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_540: # %vector.body4632 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_540 -# %bb.541: # %vector.body4419.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.541: # %vector.body4640.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_542: # %vector.body4419 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_542: # %vector.body4640 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_542 -# %bb.543: # %vector.body4424.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.543: # %vector.body4648.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_544: # %vector.body4424 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_544: # %vector.body4648 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_544 b .LBB5_573 -.LBB5_545: # %vector.body4389.preheader +.LBB5_545: # %vector.body4601.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_546: # %vector.body4389 +.LBB5_546: # %vector.body4601 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_546 -# %bb.547: # %vector.body4394.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.547: # %vector.body4606.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_548: # %vector.body4394 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_548: # %vector.body4606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_548 -# %bb.549: # %vector.body4399.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.549: # %vector.body4614.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_550: # %vector.body4399 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_550: # %vector.body4614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_550 b .LBB5_573 -.LBB5_551: # %vector.body4374.preheader +.LBB5_551: # %vector.body4580.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_552: # %vector.body4374 +.LBB5_552: # %vector.body4580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_552 -# %bb.553: # %vector.body4379.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.553: # %vector.body4585.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_554: # %vector.body4379 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_554: # %vector.body4585 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_554 -# %bb.555: # %vector.body4384.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.555: # %vector.body4593.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_556: # %vector.body4384 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_556: # %vector.body4593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_556 b .LBB5_573 .LBB5_557: # %.preheader.i2121.preheader @@ -7140,19 +7043,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_558: # %vector.body4364 +.LBB5_558: # %vector.body4570 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_558 -# %bb.559: # %vector.body4369.preheader +# %bb.559: # %vector.body4575.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_560: # %vector.body4369 +.LBB5_560: # %vector.body4575 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7174,7 +7077,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_562: # %vector.body4359 +.LBB5_562: # %vector.body4565 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7196,7 +7099,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_564: # %vector.body4354 +.LBB5_564: # %vector.body4560 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7204,31 +7107,30 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_564 b .LBB5_573 -.LBB5_565: # %vector.body4349.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_566: # %vector.body4349 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_566 +.LBB5_565: # %vector.body4552.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_566: # %vector.body4552 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_566 b .LBB5_573 .LBB5_567: # %.preheader.i2154.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7377,43 +7279,43 @@ init: # @init bnez $a2, .LBB5_572 .LBB5_573: # %set1d.exit1086 move $a0, $zero - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $fp, $sp, 64 # 8-byte Folded Reload - ld.d $ra, $sp, 72 # 8-byte Folded Reload - addi.d $sp, $sp, 80 + ld.d $s2, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload + ld.d $s0, $sp, 72 # 8-byte Folded Reload + ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $ra, $sp, 88 # 8-byte Folded Reload + addi.d $sp, $sp, 96 ret .LBB5_574: # %.preheader.i2177.preheader pcalau12i $a0, %pc_hi20(global_data+640192) addi.d $a0, $a0, %pc_lo12(global_data+640192) b .LBB5_345 -.LBB5_575: # %vector.body4325.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_576: # %vector.body4325 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_576 +.LBB5_575: # %vector.body4525.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_576: # %vector.body4525 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_576 b .LBB5_573 -.LBB5_577: # %vector.body4320.preheader +.LBB5_577: # %vector.body4520.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 @@ -7421,7 +7323,7 @@ init: # @init lu12i.w $a2, 260096 ori $a2, $a2, 8 xvreplgr2vr.w $xr0, $a2 -.LBB5_578: # %vector.body4320 +.LBB5_578: # %vector.body4520 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7429,162 +7331,155 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_578 b .LBB5_573 -.LBB5_579: # %vector.body4310.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_580: # %vector.body4310 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_579: # %vector.body4504.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_580: # %vector.body4504 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_580 -# %bb.581: # %vector.body4315.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_582: # %vector.body4315 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.581: # %vector.body4512.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_582: # %vector.body4512 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_582 b .LBB5_573 -.LBB5_583: # %vector.body4305.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_584: # %vector.body4305 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_584 +.LBB5_583: # %vector.body4496.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_584: # %vector.body4496 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_584 b .LBB5_573 -.LBB5_585: # %vector.body4300.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_586: # %vector.body4300 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_586 +.LBB5_585: # %vector.body4488.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_586: # %vector.body4488 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_586 b .LBB5_573 -.LBB5_587: # %vector.body4295.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_588: # %vector.body4295 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_588 +.LBB5_587: # %vector.body4480.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_588: # %vector.body4480 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_588 b .LBB5_573 -.LBB5_589: # %vector.body4290.preheader +.LBB5_589: # %vector.body4472.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_590: # %vector.body4290 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_590 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_590: # %vector.body4472 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_590 b .LBB5_607 .LBB5_591: # %.preheader.i2230.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7603,79 +7498,74 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a1, $fp, $a0 - ori $a2, $zero, 2 + ori $a0, $a0, 2104 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + add.d $a2, $fp, $a0 lu12i.w $a0, 7 ori $a3, $a0, 3328 -.LBB5_592: # %vector.body4275 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_592: # %vector.body4448 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_592 -# %bb.593: # %vector.body4280.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.593: # %vector.body4456.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_594: # %vector.body4280 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_594: # %vector.body4456 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_594 -# %bb.595: # %vector.body4285.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.595: # %vector.body4464.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_596: # %vector.body4285 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_596: # %vector.body4464 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_596 b .LBB5_573 .LBB5_597: # %.preheader34.i2253.preheader @@ -7734,42 +7624,41 @@ init: # @init lu12i.w $a1, 220 ori $a1, $a1, 1212 b .LBB5_630 -.LBB5_600: # %vector.body4263.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_601: # %vector.body4263 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_601 - b .LBB5_573 -.LBB5_602: # %vector.body4256.preheader +.LBB5_600: # %vector.body4433.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_601: # %vector.body4433 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_601 + b .LBB5_573 +.LBB5_602: # %vector.body4426.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_603: # %vector.body4256 +.LBB5_603: # %vector.body4426 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -7804,45 +7693,44 @@ init: # @init pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) b .LBB5_610 -.LBB5_605: # %vector.body4251.preheader +.LBB5_605: # %vector.body4418.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_606: # %vector.body4251 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_606 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_606: # %vector.body4418 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_606 .LBB5_607: # %set1d.exit2229 lu12i.w $a1, 31 ori $a1, $a1, 1020 lu12i.w $a2, -262144 b .LBB5_687 -.LBB5_608: # %vector.body4246.preheader +.LBB5_608: # %vector.body4413.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_609: # %vector.body4246 +.LBB5_609: # %vector.body4413 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7855,14 +7743,14 @@ init: # @init add.d $a0, $a0, $a2 ori $a2, $a1, 1024 b .LBB5_346 -.LBB5_611: # %vector.body4241.preheader +.LBB5_611: # %vector.body4408.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_612: # %vector.body4241 +.LBB5_612: # %vector.body4408 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7886,147 +7774,141 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 b .LBB5_573 -.LBB5_614: # %vector.body4216.preheader +.LBB5_614: # %vector.body4374.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_615: # %vector.body4216 +.LBB5_615: # %vector.body4374 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_615 -# %bb.616: # %vector.body4221.preheader +# %bb.616: # %vector.body4379.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_617: # %vector.body4221 +.LBB5_617: # %vector.body4379 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_617 -# %bb.618: # %vector.body4226.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.618: # %vector.body4384.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_619: # %vector.body4226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_619: # %vector.body4384 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_619 -# %bb.620: # %vector.body4231.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.620: # %vector.body4392.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_621: # %vector.body4231 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_621: # %vector.body4392 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_621 -# %bb.622: # %vector.body4236.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.622: # %vector.body4400.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_623: # %vector.body4236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_623: # %vector.body4400 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_623 b .LBB5_573 -.LBB5_624: # %vector.body4211.preheader +.LBB5_624: # %vector.body4366.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_625: # %vector.body4211 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_625 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_625: # %vector.body4366 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_625 # %bb.626: # %set1d.exit2327 lu12i.w $a1, 31 b .LBB5_686 -.LBB5_627: # %vector.body4204.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_627: # %vector.body4359.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_628: # %vector.body4204 +.LBB5_628: # %vector.body4359 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8075,81 +7957,77 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ori $a0, $s0, 1044 + ori $a0, $s0, 1048 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_632: # %vector.body4199 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_632 - b .LBB5_573 -.LBB5_633: # %vector.body4189.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_634: # %vector.body4189 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_634 -# %bb.635: # %vector.body4194.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_636: # %vector.body4194 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_632: # %vector.body4351 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_632 + b .LBB5_573 +.LBB5_633: # %vector.body4335.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_634: # %vector.body4335 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_634 +# %bb.635: # %vector.body4343.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_636: # %vector.body4343 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_636 b .LBB5_573 .LBB5_637: # %.preheader34.i2352.preheader @@ -8248,26 +8126,26 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_640 b .LBB5_573 -.LBB5_641: # %vector.body4167.preheader +.LBB5_641: # %vector.body4313.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_642: # %vector.body4167 +.LBB5_642: # %vector.body4313 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_642 -# %bb.643: # %vector.body4172.preheader +# %bb.643: # %vector.body4318.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_644: # %vector.body4172 +.LBB5_644: # %vector.body4318 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8275,78 +8153,75 @@ init: # @init addi.d $a2, $a2, 64 bnez $a1, .LBB5_644 b .LBB5_653 -.LBB5_645: # %vector.body4157.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_646: # %vector.body4157 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_645: # %vector.body4297.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_646: # %vector.body4297 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_646 -# %bb.647: # %vector.body4162.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_648: # %vector.body4162 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.647: # %vector.body4305.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_648: # %vector.body4305 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_648 b .LBB5_573 -.LBB5_649: # %vector.body4147.preheader +.LBB5_649: # %vector.body4287.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_650: # %vector.body4147 +.LBB5_650: # %vector.body4287 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_650 -# %bb.651: # %vector.body4152.preheader +# %bb.651: # %vector.body4292.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_652: # %vector.body4152 +.LBB5_652: # %vector.body4292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8359,134 +8234,128 @@ init: # @init lu12i.w $a2, 260096 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_654: # %vector.body4132.preheader +.LBB5_654: # %vector.body4266.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_655: # %vector.body4132 +.LBB5_655: # %vector.body4266 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_655 -# %bb.656: # %vector.body4137.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_657: # %vector.body4137 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_657 -# %bb.658: # %vector.body4142.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_659: # %vector.body4142 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.656: # %vector.body4271.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a1, 3328 +.LBB5_657: # %vector.body4271 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_657 +# %bb.658: # %vector.body4279.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_659: # %vector.body4279 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_659 b .LBB5_573 -.LBB5_660: # %vector.body4117.preheader +.LBB5_660: # %vector.body4245.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_661: # %vector.body4117 +.LBB5_661: # %vector.body4245 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_661 -# %bb.662: # %vector.body4122.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.662: # %vector.body4250.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_663: # %vector.body4122 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_663: # %vector.body4250 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_663 -# %bb.664: # %vector.body4127.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.664: # %vector.body4258.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_665: # %vector.body4127 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_665: # %vector.body4258 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_665 b .LBB5_573 .LBB5_666: # %.preheader.i2421.preheader @@ -8503,74 +8372,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_667: # %vector.body4097 +.LBB5_667: # %vector.body4219 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_667 -# %bb.668: # %vector.body4102.preheader +# %bb.668: # %vector.body4224.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_669: # %vector.body4102 +.LBB5_669: # %vector.body4224 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_669 -# %bb.670: # %vector.body4107.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.670: # %vector.body4229.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_671: # %vector.body4107 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_671: # %vector.body4229 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_671 -# %bb.672: # %vector.body4112.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.672: # %vector.body4237.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_673: # %vector.body4112 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_673: # %vector.body4237 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_673 b .LBB5_573 .LBB5_674: # %.preheader.i2443.preheader @@ -8719,69 +8585,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_679 b .LBB5_573 -.LBB5_680: # %vector.body4063.preheader +.LBB5_680: # %vector.body4179.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 xvldi $xr0, -1424 -.LBB5_681: # %vector.body4063 +.LBB5_681: # %vector.body4179 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a3, $a3, -16 addi.d $a1, $a1, 64 bnez $a3, .LBB5_681 -# %bb.682: # %vector.body4068.preheader +# %bb.682: # %vector.body4184.preheader lu12i.w $a1, 31 - ori $a3, $a1, 1044 - add.d $a3, $a0, $a3 - ori $a4, $zero, 2 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a4, $a1, 1048 + add.d $a4, $a0, $a4 ori $a5, $a2, 3328 -.LBB5_683: # %vector.body4068 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - addi.d $a7, $a4, -1 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa0, $a7 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a5, $a5, -2 - addi.w $a4, $a4, 2 +.LBB5_683: # %vector.body4184 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a4, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a5, $a5, -4 + addi.d $a4, $a4, 16 bnez $a5, .LBB5_683 -# %bb.684: # %vector.body4073.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2100 +# %bb.684: # %vector.body4192.preheader + lu12i.w $a4, 62 + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a3, $a4, 2104 add.d $a3, $a0, $a3 - ori $a4, $zero, 2 ori $a2, $a2, 3328 -.LBB5_685: # %vector.body4073 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a4, 31, 0 - addi.d $a6, $a4, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a2, $a2, -2 - addi.w $a4, $a4, 2 +.LBB5_685: # %vector.body4192 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a3, $a3, 16 bnez $a2, .LBB5_685 .LBB5_686: # %set1d.exit2479 ori $a1, $a1, 1020 @@ -8790,16 +8653,16 @@ init: # @init lu32i.d $a2, 0 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_688: # %vector.body4051.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_688: # %vector.body4167.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_689: # %vector.body4051 +.LBB5_689: # %vector.body4167 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8830,12 +8693,12 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_689 -# %bb.690: # %vector.body4058.preheader +# %bb.690: # %vector.body4174.preheader pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 xvldi $xr0, -1424 -.LBB5_691: # %vector.body4058 +.LBB5_691: # %vector.body4174 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -8843,28 +8706,28 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_691 b .LBB5_573 -.LBB5_692: # %vector.body4039.preheader +.LBB5_692: # %vector.body4155.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_693: # %vector.body4039 +.LBB5_693: # %vector.body4155 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_693 -# %bb.694: # %vector.body4044.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.694: # %vector.body4160.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_695: # %vector.body4044 +.LBB5_695: # %vector.body4160 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8904,15 +8767,15 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_697: # %vector.body4032 +.LBB5_697: # %vector.body4148 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8944,28 +8807,28 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_697 b .LBB5_573 -.LBB5_698: # %vector.body4020.preheader +.LBB5_698: # %vector.body4136.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_699: # %vector.body4020 +.LBB5_699: # %vector.body4136 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_699 -# %bb.700: # %vector.body4025.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.700: # %vector.body4141.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_701: # %vector.body4025 +.LBB5_701: # %vector.body4141 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8997,30 +8860,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_701 b .LBB5_573 -.LBB5_702: # %vector.body4008.preheader +.LBB5_702: # %vector.body4124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_703: # %vector.body4008 +.LBB5_703: # %vector.body4124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_703 -# %bb.704: # %vector.body4013.preheader +# %bb.704: # %vector.body4129.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_705: # %vector.body4013 +.LBB5_705: # %vector.body4129 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9052,30 +8915,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_705 b .LBB5_573 -.LBB5_706: # %vector.body3996.preheader +.LBB5_706: # %vector.body4112.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_707: # %vector.body3996 +.LBB5_707: # %vector.body4112 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_707 -# %bb.708: # %vector.body4001.preheader +# %bb.708: # %vector.body4117.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_709: # %vector.body4001 +.LBB5_709: # %vector.body4117 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9107,7 +8970,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_709 b .LBB5_573 -.LBB5_710: # %vector.body3971.preheader +.LBB5_710: # %vector.body4081.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 @@ -9115,7 +8978,7 @@ init: # @init ori $a1, $s0, 3328 xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill -.LBB5_711: # %vector.body3971 +.LBB5_711: # %vector.body4081 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9123,63 +8986,60 @@ init: # @init addi.d $a1, $a1, -16 addi.d $a0, $a0, 64 bnez $a1, .LBB5_711 -# %bb.712: # %vector.body3976.preheader +# %bb.712: # %vector.body4086.preheader lu12i.w $a1, 31 - ori $a0, $a1, 1044 - add.d $a0, $fp, $a0 - ori $a2, $zero, 2 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a2, $a1, 1048 + add.d $a2, $fp, $a2 ori $a3, $s0, 3328 -.LBB5_713: # %vector.body3976 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_713: # %vector.body4086 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_713 -# %bb.714: # %vector.body3981.preheader - lu12i.w $a0, 62 - ori $a0, $a0, 2100 +# %bb.714: # %vector.body4094.preheader + lu12i.w $a2, 62 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a2, 2104 add.d $a0, $fp, $a0 - ori $a2, $zero, 2 - ori $a3, $s0, 3328 -.LBB5_715: # %vector.body3981 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_715 -# %bb.716: # %vector.body3986.preheader + ori $a2, $s0, 3328 +.LBB5_715: # %vector.body4094 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a0, $a0, 16 + bnez $a2, .LBB5_715 +# %bb.716: # %vector.body4102.preheader lu12i.w $a0, 93 ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 xvldi $xr0, -1296 -.LBB5_717: # %vector.body3986 +.LBB5_717: # %vector.body4102 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -9198,7 +9058,7 @@ init: # @init ori $a0, $a0, 2500 add.d $a0, $fp, $a0 ori $a1, $s0, 3328 -.LBB5_719: # %vector.body3991 +.LBB5_719: # %vector.body4107 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9207,238 +9067,225 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_719 b .LBB5_573 -.LBB5_720: # %vector.body3946.preheader +.LBB5_720: # %vector.body4044.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_721: # %vector.body3946 +.LBB5_721: # %vector.body4044 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_721 -# %bb.722: # %vector.body3951.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.722: # %vector.body4049.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_723: # %vector.body3951 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_723: # %vector.body4049 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_723 -# %bb.724: # %vector.body3956.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.724: # %vector.body4057.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_725: # %vector.body3956 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_725: # %vector.body4057 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_725 -# %bb.726: # %vector.body3961.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.726: # %vector.body4065.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_727: # %vector.body3961 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_727: # %vector.body4065 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_727 -# %bb.728: # %vector.body3966.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.728: # %vector.body4073.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_729: # %vector.body3966 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_729: # %vector.body4073 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_729 b .LBB5_573 -.LBB5_730: # %vector.body3931.preheader +.LBB5_730: # %vector.body4023.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_731: # %vector.body3931 +.LBB5_731: # %vector.body4023 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_731 -# %bb.732: # %vector.body3936.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.732: # %vector.body4028.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_733: # %vector.body3936 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_733: # %vector.body4028 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_733 -# %bb.734: # %vector.body3941.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.734: # %vector.body4036.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_735: # %vector.body3941 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_735: # %vector.body4036 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_735 b .LBB5_573 -.LBB5_736: # %vector.body3921.preheader +.LBB5_736: # %vector.body4007.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_737: # %vector.body3921 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_737: # %vector.body4007 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_737 -# %bb.738: # %vector.body3926.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.738: # %vector.body4015.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_739: # %vector.body3926 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_739: # %vector.body4015 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_739 b .LBB5_573 .LBB5_740: # %.preheader.i2608.preheader @@ -9455,14 +9302,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_741: # %vector.body3911 +.LBB5_741: # %vector.body3997 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_741 -# %bb.742: # %vector.body3916.preheader +# %bb.742: # %vector.body4002.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -9470,7 +9317,7 @@ init: # @init lu12i.w $a2, 219235 ori $a2, $a2, 1981 xvreplgr2vr.w $xr0, $a2 -.LBB5_743: # %vector.body3916 +.LBB5_743: # %vector.body4002 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -9488,14 +9335,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_745: # %vector.body3904 +.LBB5_745: # %vector.body3990 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9527,248 +9374,237 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_745 b .LBB5_573 -.LBB5_746: # %vector.body3879.preheader +.LBB5_746: # %vector.body3959.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_747: # %vector.body3879 +.LBB5_747: # %vector.body3959 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_747 -# %bb.748: # %vector.body3884.preheader +# %bb.748: # %vector.body3964.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_749: # %vector.body3884 +.LBB5_749: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_749 -# %bb.750: # %vector.body3889.preheader +# %bb.750: # %vector.body3969.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_751: # %vector.body3889 +.LBB5_751: # %vector.body3969 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_751 -# %bb.752: # %vector.body3894.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.752: # %vector.body3974.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_753: # %vector.body3894 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_753: # %vector.body3974 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_753 -# %bb.754: # %vector.body3899.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.754: # %vector.body3982.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_755: # %vector.body3899 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_755: # %vector.body3982 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_755 b .LBB5_573 -.LBB5_756: # %vector.body3859.preheader +.LBB5_756: # %vector.body3930.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_757: # %vector.body3859 +.LBB5_757: # %vector.body3930 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_757 -# %bb.758: # %vector.body3864.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.758: # %vector.body3935.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_759: # %vector.body3864 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_759: # %vector.body3935 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_759 -# %bb.760: # %vector.body3869.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.760: # %vector.body3943.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_761: # %vector.body3869 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_761: # %vector.body3943 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_761 -# %bb.762: # %vector.body3874.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.762: # %vector.body3951.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_763: # %vector.body3874 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_763: # %vector.body3951 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_763 b .LBB5_573 -.LBB5_764: # %vector.body3844.preheader +.LBB5_764: # %vector.body3909.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_765: # %vector.body3844 +.LBB5_765: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_765 -# %bb.766: # %vector.body3849.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.766: # %vector.body3914.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_767: # %vector.body3849 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_767: # %vector.body3914 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_767 -# %bb.768: # %vector.body3854.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.768: # %vector.body3922.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_769: # %vector.body3854 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_769: # %vector.body3922 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_769 b .LBB5_573 .LBB5_770: # %.preheader.i2684.preheader @@ -9785,103 +9621,99 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_771: # %vector.body3829 +.LBB5_771: # %vector.body3888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_771 -# %bb.772: # %vector.body3834.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.772: # %vector.body3893.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_773: # %vector.body3834 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_773: # %vector.body3893 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_773 -# %bb.774: # %vector.body3839.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.774: # %vector.body3901.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_775: # %vector.body3839 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_775: # %vector.body3901 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_775 b .LBB5_573 -.LBB5_776: # %vector.body3819.preheader +.LBB5_776: # %vector.body3875.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 32 - lu12i.w $a2, 7 - ori $a3, $a2, 3328 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_777: # %vector.body3819 +.LBB5_777: # %vector.body3875 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 - addi.d $a1, $a1, 64 + addi.d $a2, $a2, 64 bnez $a3, .LBB5_777 -# %bb.778: # %vector.body3824.preheader - lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a0, $a0, $a1 - ori $a1, $zero, 2 - ori $a2, $a2, 3328 -.LBB5_779: # %vector.body3824 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_779 +# %bb.778: # %vector.body3880.preheader + lu12i.w $a2, 31 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 1048 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_779: # %vector.body3880 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_779 b .LBB5_573 .LBB5_780: # %.preheader.i2711.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9897,23 +9729,23 @@ init: # @init lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_781: # %vector.body3807 +.LBB5_781: # %vector.body3863 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_781 -# %bb.782: # %vector.body3812.preheader +# %bb.782: # %vector.body3868.preheader lu12i.w $a0, 62 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) pcalau12i $a2, %pc_hi20(.LCPI5_1) - xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_2) + xvld $xr1, $a2, %pc_lo12(.LCPI5_2) ori $a0, $a0, 2096 add.d $a0, $fp, $a0 ori $a1, $a1, 3328 -.LBB5_783: # %vector.body3812 +.LBB5_783: # %vector.body3868 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9959,142 +9791,135 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_785: # %vector.body3792 +.LBB5_785: # %vector.body3842 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_785 -# %bb.786: # %vector.body3797.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.786: # %vector.body3847.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_787: # %vector.body3797 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_787: # %vector.body3847 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_787 -# %bb.788: # %vector.body3802.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.788: # %vector.body3855.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_789: # %vector.body3802 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_789: # %vector.body3855 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_789 b .LBB5_573 -.LBB5_790: # %vector.body3782.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_791: # %vector.body3782 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_790: # %vector.body3826.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_791: # %vector.body3826 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_791 -# %bb.792: # %vector.body3787.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_793: # %vector.body3787 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.792: # %vector.body3834.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_793: # %vector.body3834 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_793 b .LBB5_573 -.LBB5_794: # %vector.body3770.preheader +.LBB5_794: # %vector.body3811.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_795: # %vector.body3770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_795 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_795: # %vector.body3811 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_795 # %bb.796: # %.preheader34.i2757.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -10160,127 +9985,121 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_799: # %vector.body3755 +.LBB5_799: # %vector.body3790 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_799 -# %bb.800: # %vector.body3760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.800: # %vector.body3795.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_801: # %vector.body3760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_801: # %vector.body3795 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_801 -# %bb.802: # %vector.body3765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.802: # %vector.body3803.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_803: # %vector.body3765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_803: # %vector.body3803 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_803 b .LBB5_573 -.LBB5_804: # %vector.body3740.preheader +.LBB5_804: # %vector.body3769.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_805: # %vector.body3740 +.LBB5_805: # %vector.body3769 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_805 -# %bb.806: # %vector.body3745.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.806: # %vector.body3774.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_807: # %vector.body3745 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_807: # %vector.body3774 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_807 -# %bb.808: # %vector.body3750.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.808: # %vector.body3782.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_809: # %vector.body3750 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_809: # %vector.body3782 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_809 b .LBB5_573 .LBB5_810: # %.preheader.i2797.preheader @@ -10293,14 +10112,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_811: # %vector.body3733 +.LBB5_811: # %vector.body3762 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10342,14 +10161,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_813: # %vector.body3726 +.LBB5_813: # %vector.body3755 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10391,14 +10210,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_815: # %vector.body3719 +.LBB5_815: # %vector.body3748 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10440,14 +10259,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_817: # %vector.body3712 +.LBB5_817: # %vector.body3741 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10489,14 +10308,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_819: # %vector.body3705 +.LBB5_819: # %vector.body3734 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10528,26 +10347,26 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_819 b .LBB5_573 -.LBB5_820: # %vector.body3695.preheader +.LBB5_820: # %vector.body3724.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_821: # %vector.body3695 +.LBB5_821: # %vector.body3724 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_821 -# %bb.822: # %vector.body3700.preheader +# %bb.822: # %vector.body3729.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_823: # %vector.body3700 +.LBB5_823: # %vector.body3729 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10555,95 +10374,92 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_823 b .LBB5_573 -.LBB5_824: # %vector.body3680.preheader +.LBB5_824: # %vector.body3703.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_825: # %vector.body3680 +.LBB5_825: # %vector.body3703 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_825 -# %bb.826: # %vector.body3685.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.826: # %vector.body3708.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_827: # %vector.body3685 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_827: # %vector.body3708 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_827 -# %bb.828: # %vector.body3690.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.828: # %vector.body3716.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_829: # %vector.body3690 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_829: # %vector.body3716 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_829 b .LBB5_573 -.LBB5_830: # %vector.body3668.preheader +.LBB5_830: # %vector.body3691.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_831: # %vector.body3668 +.LBB5_831: # %vector.body3691 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_831 -# %bb.832: # %vector.body3673.preheader +# %bb.832: # %vector.body3696.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_833: # %vector.body3673 +.LBB5_833: # %vector.body3696 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10675,16 +10491,16 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_833 b .LBB5_573 -.LBB5_834: # %vector.body3653.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_834: # %vector.body3674.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_835: # %vector.body3653 +.LBB5_835: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10715,7 +10531,7 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_835 -# %bb.836: # %vector.body3658.preheader +# %bb.836: # %vector.body3681.preheader pcalau12i $a1, %pc_hi20(global_data) addi.d $a1, $a1, %pc_lo12(global_data) lu12i.w $a2, 31 @@ -10723,20 +10539,20 @@ init: # @init add.d $a2, $a1, $a2 ori $a3, $a0, 3328 xvldi $xr0, -1424 -.LBB5_837: # %vector.body3658 +.LBB5_837: # %vector.body3681 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_837 -# %bb.838: # %vector.body3663.preheader +# %bb.838: # %vector.body3686.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 xvldi $xr0, -1296 -.LBB5_839: # %vector.body3663 +.LBB5_839: # %vector.body3686 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -10744,40 +10560,40 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_839 b .LBB5_573 -.LBB5_840: # %vector.body3638.preheader +.LBB5_840: # %vector.body3659.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_841: # %vector.body3638 +.LBB5_841: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_841 -# %bb.842: # %vector.body3643.preheader +# %bb.842: # %vector.body3664.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -3264 -.LBB5_843: # %vector.body3643 +.LBB5_843: # %vector.body3664 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_843 -# %bb.844: # %vector.body3648.preheader +# %bb.844: # %vector.body3669.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3265 -.LBB5_845: # %vector.body3648 +.LBB5_845: # %vector.body3669 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10785,82 +10601,78 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_845 b .LBB5_573 -.LBB5_846: # %vector.body3633.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_847: # %vector.body3633 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_847 - b .LBB5_573 -.LBB5_848: # %vector.body3623.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 +.LBB5_846: # %vector.body3651.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_849: # %vector.body3623 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_849 -# %bb.850: # %vector.body3628.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_851: # %vector.body3628 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_847: # %vector.body3651 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_847 + b .LBB5_573 +.LBB5_848: # %vector.body3635.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_849: # %vector.body3635 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_849 +# %bb.850: # %vector.body3643.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_851: # %vector.body3643 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_851 b .LBB5_573 .Lfunc_end5: diff --git a/results/MultiSource/Benchmarks/TSVC/LinearDependence-flt/CMakeFiles/LinearDependence-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/LinearDependence-flt/CMakeFiles/LinearDependence-flt.dir/tsc.s index a9f92a9..d111648 100644 --- a/results/MultiSource/Benchmarks/TSVC/LinearDependence-flt/CMakeFiles/LinearDependence-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/LinearDependence-flt/CMakeFiles/LinearDependence-flt.dir/tsc.s @@ -11,6 +11,11 @@ .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI0_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1d .p2align 5 @@ -63,30 +68,29 @@ set1d: # @set1d bnez $a1, .LBB0_3 b .LBB0_8 .LBB0_4: # %vector.body40.preheader - addi.d $a0, $a0, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a1, %pc_hi20(.LCPI0_2) + vld $vr0, $a1, %pc_lo12(.LCPI0_2) + addi.d $a0, $a0, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB0_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB0_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB0_5 b .LBB0_8 .LBB0_6: # %.preheader.preheader move $a2, $zero @@ -118,6 +122,11 @@ set1d: # @set1d .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI1_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1ds .p2align 5 @@ -170,30 +179,29 @@ set1ds: # @set1ds bnez $a0, .LBB1_3 b .LBB1_8 .LBB1_4: # %vector.body40.preheader - addi.d $a0, $a1, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) + addi.d $a0, $a1, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB1_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB1_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB1_5 b .LBB1_8 .LBB1_6: # %.preheader.preheader move $a0, $zero @@ -685,14 +693,19 @@ check: # @check .word .LBB4_14-.LJTI4_0 .word .LBB4_17-.LJTI4_0 # -- End function - .section .rodata.cst32,"aM",@progbits,32 - .p2align 5, 0x0 # -- Begin function init + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 # -- Begin function init .LCPI5_0: + .dword 0 # 0x0 + .dword 1 # 0x1 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 +.LCPI5_1: .dword 4 # 0x4 .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 -.LCPI5_1: +.LCPI5_2: .dword 0 # 0x0 .dword 1 # 0x1 .dword 2 # 0x2 @@ -703,11 +716,12 @@ check: # @check .type init,@function init: # @init # %bb.0: - addi.d $sp, $sp, -80 - st.d $ra, $sp, 72 # 8-byte Folded Spill - st.d $fp, $sp, 64 # 8-byte Folded Spill - st.d $s0, $sp, 56 # 8-byte Folded Spill - st.d $s1, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -96 + st.d $ra, $sp, 88 # 8-byte Folded Spill + st.d $fp, $sp, 80 # 8-byte Folded Spill + st.d $s0, $sp, 72 # 8-byte Folded Spill + st.d $s1, $sp, 64 # 8-byte Folded Spill + st.d $s2, $sp, 56 # 8-byte Folded Spill move $fp, $a0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a1, $a0, %pc_lo12(.L.str.1) @@ -1661,131 +1675,122 @@ init: # @init jirl $ra, $ra, 0 bnez $a0, .LBB5_573 # %bb.136: # %vector.body.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 + addi.d $a3, $a0, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 .LBB5_137: # %vector.body # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_137 # %bb.138: # %vector.body3598.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 + lu12i.w $a3, 31 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 .LBB5_139: # %vector.body3598 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_139 -# %bb.140: # %vector.body3603.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_141: # %vector.body3603 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.140: # %vector.body3606.preheader + lu12i.w $a3, 62 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_141: # %vector.body3606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_141 -# %bb.142: # %vector.body3608.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_143: # %vector.body3608 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.142: # %vector.body3614.preheader + lu12i.w $a3, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_143: # %vector.body3614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_143 -# %bb.144: # %vector.body3613.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_145: # %vector.body3613 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 - bnez $a1, .LBB5_145 +# %bb.144: # %vector.body3622.preheader + lu12i.w $a3, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a3, 136 + add.d $a1, $a0, $a1 + ori $a2, $a2, 3328 +.LBB5_145: # %vector.body3622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_145 # %bb.146: # %.preheader34.i2938.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -1837,7 +1842,7 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_147 b .LBB5_573 -.LBB5_148: # %vector.body5445.preheader +.LBB5_148: # %vector.body5846.preheader pcalau12i $a0, %pc_hi20(X+4) addi.d $a0, $a0, %pc_lo12(X+4) pcalau12i $a1, %pc_hi20(V+4) @@ -1852,7 +1857,7 @@ init: # @init lu12i.w $a6, 7 ori $a6, $a6, 3328 .p2align 4, , 16 -.LBB5_149: # %vector.body5445 +.LBB5_149: # %vector.body5846 # =>This Inner Loop Header: Depth=1 bstrpick.d $a7, $a5, 31, 0 addi.d $t0, $a5, -5 @@ -1896,7 +1901,7 @@ init: # @init addi.d $a4, $a4, 8 bnez $a6, .LBB5_149 b .LBB5_573 -.LBB5_150: # %vector.body5412.preheader +.LBB5_150: # %vector.body5813.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -1904,24 +1909,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_151: # %vector.body5412 +.LBB5_151: # %vector.body5813 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_151 -# %bb.152: # %vector.body5417.preheader +# %bb.152: # %vector.body5818.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_153: # %vector.body5417 +.LBB5_153: # %vector.body5818 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1952,15 +1957,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_153 -# %bb.154: # %vector.body5424.preheader +# %bb.154: # %vector.body5825.preheader lu12i.w $a4, 62 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 2096 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_155: # %vector.body5424 +.LBB5_155: # %vector.body5825 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1991,15 +1996,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_155 -# %bb.156: # %vector.body5431.preheader +# %bb.156: # %vector.body5832.preheader lu12i.w $a4, 93 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 3152 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_157: # %vector.body5431 +.LBB5_157: # %vector.body5832 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2030,15 +2035,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_157 -# %bb.158: # %vector.body5438.preheader +# %bb.158: # %vector.body5839.preheader lu12i.w $a4, 125 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_159: # %vector.body5438 +.LBB5_159: # %vector.body5839 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2070,7 +2075,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_159 b .LBB5_573 -.LBB5_160: # %vector.body5400.preheader +.LBB5_160: # %vector.body5801.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2078,24 +2083,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_161: # %vector.body5400 +.LBB5_161: # %vector.body5801 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_161 -# %bb.162: # %vector.body5405.preheader +# %bb.162: # %vector.body5806.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_163: # %vector.body5405 +.LBB5_163: # %vector.body5806 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2127,7 +2132,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_163 b .LBB5_573 -.LBB5_164: # %vector.body5388.preheader +.LBB5_164: # %vector.body5789.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2135,24 +2140,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_165: # %vector.body5388 +.LBB5_165: # %vector.body5789 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_165 -# %bb.166: # %vector.body5393.preheader +# %bb.166: # %vector.body5794.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_167: # %vector.body5393 +.LBB5_167: # %vector.body5794 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2288,7 +2293,7 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_171 b .LBB5_573 -.LBB5_172: # %vector.body5354.preheader +.LBB5_172: # %vector.body5755.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2296,7 +2301,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_173: # %vector.body5354 +.LBB5_173: # %vector.body5755 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2436,14 +2441,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_179 b .LBB5_573 -.LBB5_180: # %vector.body5349.preheader +.LBB5_180: # %vector.body5750.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_181: # %vector.body5349 +.LBB5_181: # %vector.body5750 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -2451,7 +2456,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_181 b .LBB5_573 -.LBB5_182: # %vector.body5339.preheader +.LBB5_182: # %vector.body5740.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2459,7 +2464,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_183: # %vector.body5339 +.LBB5_183: # %vector.body5740 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2610,30 +2615,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_189 b .LBB5_573 -.LBB5_190: # %vector.body5315.preheader +.LBB5_190: # %vector.body5716.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_191: # %vector.body5315 +.LBB5_191: # %vector.body5716 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_191 -# %bb.192: # %vector.body5320.preheader +# %bb.192: # %vector.body5721.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_193: # %vector.body5320 +.LBB5_193: # %vector.body5721 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2665,30 +2670,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_193 b .LBB5_573 -.LBB5_194: # %vector.body5303.preheader +.LBB5_194: # %vector.body5704.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_195: # %vector.body5303 +.LBB5_195: # %vector.body5704 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_195 -# %bb.196: # %vector.body5308.preheader +# %bb.196: # %vector.body5709.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_197: # %vector.body5308 +.LBB5_197: # %vector.body5709 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2734,74 +2739,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_199: # %vector.body5283 +.LBB5_199: # %vector.body5678 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_199 -# %bb.200: # %vector.body5288.preheader +# %bb.200: # %vector.body5683.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_201: # %vector.body5288 +.LBB5_201: # %vector.body5683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_201 -# %bb.202: # %vector.body5293.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.202: # %vector.body5688.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_203: # %vector.body5293 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_203: # %vector.body5688 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_203 -# %bb.204: # %vector.body5298.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.204: # %vector.body5696.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_205: # %vector.body5298 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_205: # %vector.body5696 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_205 b .LBB5_573 .LBB5_206: # %.preheader.i1195.preheader @@ -2818,74 +2820,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_207: # %vector.body5263 +.LBB5_207: # %vector.body5652 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_207 -# %bb.208: # %vector.body5268.preheader +# %bb.208: # %vector.body5657.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_209: # %vector.body5268 +.LBB5_209: # %vector.body5657 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_209 -# %bb.210: # %vector.body5273.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.210: # %vector.body5662.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_211: # %vector.body5273 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_211: # %vector.body5662 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_211 -# %bb.212: # %vector.body5278.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.212: # %vector.body5670.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_213: # %vector.body5278 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_213: # %vector.body5670 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_213 b .LBB5_573 .LBB5_214: # %.preheader.i1217.preheader @@ -3072,31 +3071,30 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_221 -# %bb.222: # %vector.body5236.preheader - pcalau12i $a1, %pc_hi20(array+4) - addi.d $a1, $a1, %pc_lo12(array+4) - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_223: # %vector.body5236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_223 +# %bb.222: # %vector.body5622.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(array+8) + addi.d $a1, $a1, %pc_lo12(array+8) + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_223: # %vector.body5622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_223 # %bb.224: # %.preheader34.i1253.preheader lu12i.w $a1, 284 ori $a1, $a1, 1888 @@ -3162,87 +3160,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_227: # %vector.body5211 +.LBB5_227: # %vector.body5588 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_227 -# %bb.228: # %vector.body5216.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.228: # %vector.body5593.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_229: # %vector.body5216 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_229: # %vector.body5593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_229 -# %bb.230: # %vector.body5221.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.230: # %vector.body5601.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_231: # %vector.body5221 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_231: # %vector.body5601 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_231 -# %bb.232: # %vector.body5226.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.232: # %vector.body5609.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_233: # %vector.body5226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_233: # %vector.body5609 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_233 b .LBB5_573 .LBB5_234: # %.preheader.i1284.preheader @@ -3259,32 +3252,32 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -3264 -.LBB5_235: # %vector.body5196 +.LBB5_235: # %vector.body5573 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_235 -# %bb.236: # %vector.body5201.preheader +# %bb.236: # %vector.body5578.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_237: # %vector.body5201 +.LBB5_237: # %vector.body5578 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_237 -# %bb.238: # %vector.body5206.preheader +# %bb.238: # %vector.body5583.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_239: # %vector.body5206 +.LBB5_239: # %vector.body5583 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -3292,30 +3285,30 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_239 b .LBB5_573 -.LBB5_240: # %vector.body5184.preheader +.LBB5_240: # %vector.body5561.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_241: # %vector.body5184 +.LBB5_241: # %vector.body5561 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_241 -# %bb.242: # %vector.body5189.preheader +# %bb.242: # %vector.body5566.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_243: # %vector.body5189 +.LBB5_243: # %vector.body5566 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3392,65 +3385,62 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_245 -# %bb.246: # %vector.body5174.preheader +# %bb.246: # %vector.body5545.preheader lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_247: # %vector.body5174 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_247: # %vector.body5545 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_247 -# %bb.248: # %vector.body5179.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.248: # %vector.body5553.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_249: # %vector.body5179 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_249: # %vector.body5553 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_249 b .LBB5_573 -.LBB5_250: # %vector.body5157.preheader +.LBB5_250: # %vector.body5528.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_251: # %vector.body5157 +.LBB5_251: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3506,30 +3496,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_253 b .LBB5_573 -.LBB5_254: # %vector.body5145.preheader +.LBB5_254: # %vector.body5516.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_255: # %vector.body5145 +.LBB5_255: # %vector.body5516 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_255 -# %bb.256: # %vector.body5150.preheader +# %bb.256: # %vector.body5521.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_257: # %vector.body5150 +.LBB5_257: # %vector.body5521 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3561,14 +3551,14 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_257 b .LBB5_573 -.LBB5_258: # %vector.body5125.preheader +.LBB5_258: # %vector.body5487.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 xvldi $xr0, -1424 -.LBB5_259: # %vector.body5125 +.LBB5_259: # %vector.body5487 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3583,274 +3573,263 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 + lu12i.w $a1, 62 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 2104 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_261: # %vector.body5130 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_261: # %vector.body5492 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_261 -# %bb.262: # %vector.body5135.preheader - lu12i.w $a0, 93 - ori $a0, $a0, 3156 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 +# %bb.262: # %vector.body5500.preheader + lu12i.w $a1, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 3160 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_263: # %vector.body5135 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_263: # %vector.body5500 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_263 -# %bb.264: # %vector.body5140.preheader - lu12i.w $a0, 125 - ori $a0, $a0, 132 +# %bb.264: # %vector.body5508.preheader + lu12i.w $a1, 125 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a1, 136 add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - ori $a2, $s0, 3328 -.LBB5_265: # %vector.body5140 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_265 + ori $a1, $s0, 3328 +.LBB5_265: # %vector.body5508 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_265 b .LBB5_573 -.LBB5_266: # %vector.body5094.preheader +.LBB5_266: # %vector.body5450.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_267: # %vector.body5094 +.LBB5_267: # %vector.body5450 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_267 -# %bb.268: # %vector.body5099.preheader +# %bb.268: # %vector.body5455.preheader lu12i.w $a2, 31 ori $a3, $a2, 1048 add.d $a4, $a0, $a3 lu12i.w $a3, 3 ori $a5, $a3, 3712 lu12i.w $a6, 260096 -.LBB5_269: # %vector.body5099 +.LBB5_269: # %vector.body5455 # =>This Inner Loop Header: Depth=1 st.w $a6, $a4, -8 st.w $a6, $a4, 0 addi.d $a5, $a5, -2 addi.d $a4, $a4, 16 bnez $a5, .LBB5_269 -# %bb.270: # %vector.body5104.preheader +# %bb.270: # %vector.body5460.preheader ori $a2, $a2, 1052 add.d $a2, $a0, $a2 ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 -.LBB5_271: # %vector.body5104 +.LBB5_271: # %vector.body5460 # =>This Inner Loop Header: Depth=1 st.w $a4, $a2, -8 st.w $a4, $a2, 0 addi.d $a3, $a3, -2 addi.d $a2, $a2, 16 bnez $a3, .LBB5_271 -# %bb.272: # %vector.body5110.preheader +# %bb.272: # %vector.body5466.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_273: # %vector.body5110 +.LBB5_273: # %vector.body5466 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_273 -# %bb.274: # %vector.body5115.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.274: # %vector.body5471.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_275: # %vector.body5115 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_275: # %vector.body5471 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_275 -# %bb.276: # %vector.body5120.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.276: # %vector.body5479.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_277: # %vector.body5120 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_277: # %vector.body5479 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_277 b .LBB5_573 -.LBB5_278: # %vector.body5079.preheader +.LBB5_278: # %vector.body5429.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_279: # %vector.body5079 +.LBB5_279: # %vector.body5429 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_279 -# %bb.280: # %vector.body5084.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.280: # %vector.body5434.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_281: # %vector.body5084 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_281: # %vector.body5434 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_281 -# %bb.282: # %vector.body5089.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.282: # %vector.body5442.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_283: # %vector.body5089 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_283: # %vector.body5442 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_283 b .LBB5_573 -.LBB5_284: # %vector.body5067.preheader +.LBB5_284: # %vector.body5417.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_285: # %vector.body5067 +.LBB5_285: # %vector.body5417 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_285 -# %bb.286: # %vector.body5072.preheader +# %bb.286: # %vector.body5422.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_287: # %vector.body5072 +.LBB5_287: # %vector.body5422 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3882,30 +3861,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_287 b .LBB5_573 -.LBB5_288: # %vector.body5055.preheader +.LBB5_288: # %vector.body5405.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_289: # %vector.body5055 +.LBB5_289: # %vector.body5405 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_289 -# %bb.290: # %vector.body5060.preheader +# %bb.290: # %vector.body5410.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_291: # %vector.body5060 +.LBB5_291: # %vector.body5410 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3937,30 +3916,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_291 b .LBB5_573 -.LBB5_292: # %vector.body5043.preheader +.LBB5_292: # %vector.body5393.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_293: # %vector.body5043 +.LBB5_293: # %vector.body5393 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_293 -# %bb.294: # %vector.body5048.preheader +# %bb.294: # %vector.body5398.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_295: # %vector.body5048 +.LBB5_295: # %vector.body5398 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3992,30 +3971,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_295 b .LBB5_573 -.LBB5_296: # %vector.body5031.preheader +.LBB5_296: # %vector.body5381.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_297: # %vector.body5031 +.LBB5_297: # %vector.body5381 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_297 -# %bb.298: # %vector.body5036.preheader +# %bb.298: # %vector.body5386.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_299: # %vector.body5036 +.LBB5_299: # %vector.body5386 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4047,30 +4026,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_299 b .LBB5_573 -.LBB5_300: # %vector.body5019.preheader +.LBB5_300: # %vector.body5369.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_301: # %vector.body5019 +.LBB5_301: # %vector.body5369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_301 -# %bb.302: # %vector.body5024.preheader +# %bb.302: # %vector.body5374.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_303: # %vector.body5024 +.LBB5_303: # %vector.body5374 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4102,69 +4081,66 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_303 b .LBB5_573 -.LBB5_304: # %vector.body5004.preheader +.LBB5_304: # %vector.body5348.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_305: # %vector.body5004 +.LBB5_305: # %vector.body5348 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_305 -# %bb.306: # %vector.body5009.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.306: # %vector.body5353.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_307: # %vector.body5009 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_307: # %vector.body5353 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_307 -# %bb.308: # %vector.body5014.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.308: # %vector.body5361.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_309: # %vector.body5014 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_309: # %vector.body5361 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_309 b .LBB5_573 .LBB5_310: # %.preheader.i1469.preheader @@ -4181,254 +4157,241 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_311: # %vector.body4984 +.LBB5_311: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_311 -# %bb.312: # %vector.body4989.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.312: # %vector.body5324.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_313: # %vector.body4989 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_313: # %vector.body5324 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_313 -# %bb.314: # %vector.body4994.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.314: # %vector.body5332.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_315: # %vector.body4994 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_315: # %vector.body5332 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_315 -# %bb.316: # %vector.body4999.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.316: # %vector.body5340.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_317: # %vector.body4999 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_317: # %vector.body5340 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_317 b .LBB5_573 -.LBB5_318: # %vector.body4964.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_319: # %vector.body4964 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_318: # %vector.body5293.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_319: # %vector.body5293 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_319 -# %bb.320: # %vector.body4969.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1072 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 +# %bb.320: # %vector.body5301.preheader + lu12i.w $a3, 31 + ori $a3, $a3, 1072 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 xvldi $xr0, -1424 -.LBB5_321: # %vector.body4969 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_321 -# %bb.322: # %vector.body4974.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2128 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 -.LBB5_323: # %vector.body4974 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_323 -# %bb.324: # %vector.body4979.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_325: # %vector.body4979 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_321: # %vector.body5301 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_321 +# %bb.322: # %vector.body5306.preheader + lu12i.w $a3, 62 + ori $a3, $a3, 2128 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 +.LBB5_323: # %vector.body5306 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_323 +# %bb.324: # %vector.body5311.preheader + lu12i.w $a3, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 3160 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_325: # %vector.body5311 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_325 b .LBB5_573 -.LBB5_326: # %vector.body4944.preheader +.LBB5_326: # %vector.body5264.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_327: # %vector.body4944 +.LBB5_327: # %vector.body5264 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_327 -# %bb.328: # %vector.body4949.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.328: # %vector.body5269.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_329: # %vector.body4949 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_329: # %vector.body5269 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_329 -# %bb.330: # %vector.body4954.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.330: # %vector.body5277.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_331: # %vector.body4954 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_331: # %vector.body5277 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_331 -# %bb.332: # %vector.body4959.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.332: # %vector.body5285.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_333: # %vector.body4959 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_333: # %vector.body5285 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_333 b .LBB5_573 .LBB5_334: # %.preheader.i1529.preheader @@ -4445,19 +4408,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_335: # %vector.body4934 +.LBB5_335: # %vector.body5254 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_335 -# %bb.336: # %vector.body4939.preheader +# %bb.336: # %vector.body5259.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_337: # %vector.body4939 +.LBB5_337: # %vector.body5259 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -4915,69 +4878,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_358 b .LBB5_573 -.LBB5_359: # %vector.body4850.preheader +.LBB5_359: # %vector.body5164.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_360: # %vector.body4850 +.LBB5_360: # %vector.body5164 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_360 -# %bb.361: # %vector.body4855.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.361: # %vector.body5169.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_362: # %vector.body4855 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a5 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_362: # %vector.body5169 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a3, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_362 -# %bb.363: # %vector.body4860.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.363: # %vector.body5177.preheader + lu12i.w $a3, 62 + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 ori $a1, $a1, 3328 -.LBB5_364: # %vector.body4860 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a4 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 +.LBB5_364: # %vector.body5177 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a2, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a1, $a1, -4 + addi.d $a2, $a2, 16 bnez $a1, .LBB5_364 # %bb.365: # %.preheader.i1631.preheader lu12i.w $a1, 156 @@ -5071,50 +5031,50 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_368 b .LBB5_573 -.LBB5_369: # %vector.body4830.preheader +.LBB5_369: # %vector.body5144.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_370: # %vector.body4830 +.LBB5_370: # %vector.body5144 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_370 -# %bb.371: # %vector.body4835.preheader +# %bb.371: # %vector.body5149.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_372: # %vector.body4835 +.LBB5_372: # %vector.body5149 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_372 -# %bb.373: # %vector.body4840.preheader +# %bb.373: # %vector.body5154.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_374: # %vector.body4840 +.LBB5_374: # %vector.body5154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_374 -# %bb.375: # %vector.body4845.preheader +# %bb.375: # %vector.body5159.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_376: # %vector.body4845 +.LBB5_376: # %vector.body5159 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5122,7 +5082,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_376 b .LBB5_573 -.LBB5_377: # %vector.body4810.preheader +.LBB5_377: # %vector.body5124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -5131,43 +5091,43 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_378: # %vector.body4810 +.LBB5_378: # %vector.body5124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_378 -# %bb.379: # %vector.body4815.preheader +# %bb.379: # %vector.body5129.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_380: # %vector.body4815 +.LBB5_380: # %vector.body5129 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_380 -# %bb.381: # %vector.body4820.preheader +# %bb.381: # %vector.body5134.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_382: # %vector.body4820 +.LBB5_382: # %vector.body5134 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_382 -# %bb.383: # %vector.body4825.preheader +# %bb.383: # %vector.body5139.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_384: # %vector.body4825 +.LBB5_384: # %vector.body5139 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5189,87 +5149,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_386: # %vector.body4790 +.LBB5_386: # %vector.body5095 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_386 -# %bb.387: # %vector.body4795.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.387: # %vector.body5100.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_388: # %vector.body4795 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_388: # %vector.body5100 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_388 -# %bb.389: # %vector.body4800.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.389: # %vector.body5108.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_390: # %vector.body4800 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_390: # %vector.body5108 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_390 -# %bb.391: # %vector.body4805.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.391: # %vector.body5116.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_392: # %vector.body4805 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_392: # %vector.body5116 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_392 b .LBB5_573 .LBB5_393: # %.preheader.i1701.preheader @@ -5286,14 +5241,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_394: # %vector.body4775 +.LBB5_394: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_394 -# %bb.395: # %vector.body4780.preheader +# %bb.395: # %vector.body5085.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -5301,19 +5256,19 @@ init: # @init lu12i.w $a3, 219235 ori $a3, $a3, 1981 xvreplgr2vr.w $xr0, $a3 -.LBB5_396: # %vector.body4780 +.LBB5_396: # %vector.body5085 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_396 -# %bb.397: # %vector.body4785.preheader +# %bb.397: # %vector.body5090.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_398: # %vector.body4785 +.LBB5_398: # %vector.body5090 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5335,87 +5290,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_400: # %vector.body4755 +.LBB5_400: # %vector.body5051 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_400 -# %bb.401: # %vector.body4760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.401: # %vector.body5056.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_402: # %vector.body4760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_402: # %vector.body5056 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_402 -# %bb.403: # %vector.body4765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.403: # %vector.body5064.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_404: # %vector.body4765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_404: # %vector.body5064 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_404 -# %bb.405: # %vector.body4770.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.405: # %vector.body5072.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_406: # %vector.body4770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_406: # %vector.body5072 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_406 b .LBB5_573 .LBB5_407: # %.preheader.i1740.preheader @@ -5432,19 +5382,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_408: # %vector.body4745 +.LBB5_408: # %vector.body5041 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_408 -# %bb.409: # %vector.body4750.preheader +# %bb.409: # %vector.body5046.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_410: # %vector.body4750 +.LBB5_410: # %vector.body5046 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5452,21 +5402,21 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_410 b .LBB5_573 -.LBB5_411: # %vector.body4725.preheader +.LBB5_411: # %vector.body5018.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_412: # %vector.body4725 +.LBB5_412: # %vector.body5018 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_412 -# %bb.413: # %vector.body4730.preheader +# %bb.413: # %vector.body5023.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 @@ -5474,49 +5424,48 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr1, $a4 -.LBB5_414: # %vector.body4730 +.LBB5_414: # %vector.body5023 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a2, -32 xvst $xr1, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_414 -# %bb.415: # %vector.body4735.preheader +# %bb.415: # %vector.body5028.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_416: # %vector.body4735 +.LBB5_416: # %vector.body5028 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_416 -# %bb.417: # %vector.body4740.preheader +# %bb.417: # %vector.body5033.preheader lu12i.w $a2, 93 - ori $a2, $a2, 3156 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_418: # %vector.body4740 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_418: # %vector.body5033 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_418 b .LBB5_573 .LBB5_419: # %.preheader.i1769.preheader @@ -5533,7 +5482,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_420: # %vector.body4720 +.LBB5_420: # %vector.body5013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5555,7 +5504,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_422: # %vector.body4715 +.LBB5_422: # %vector.body5008 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5563,14 +5512,14 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_422 b .LBB5_573 -.LBB5_423: # %vector.body4700.preheader +.LBB5_423: # %vector.body4993.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_424: # %vector.body4700 +.LBB5_424: # %vector.body4993 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5663,14 +5612,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_428 b .LBB5_573 -.LBB5_429: # %vector.body4685.preheader +.LBB5_429: # %vector.body4978.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_430: # %vector.body4685 +.LBB5_430: # %vector.body4978 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5763,32 +5712,31 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_434 b .LBB5_573 -.LBB5_435: # %vector.body4663.preheader +.LBB5_435: # %vector.body4947.preheader + pcalau12i $s1, %pc_hi20(.LCPI5_0) + vld $vr0, $s1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) - addi.d $a0, $s0, 4 - ori $a1, $zero, 2 - lu12i.w $s1, 7 - ori $a2, $s1, 3328 -.LBB5_436: # %vector.body4663 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_436 + addi.d $a0, $s0, 8 + lu12i.w $s2, 7 + ori $a1, $s2, 3328 +.LBB5_436: # %vector.body4947 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_436 # %bb.437: # %.preheader.i1826.preheader lu12i.w $a1, 31 ori $a0, $a1, 1040 @@ -5799,54 +5747,50 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 2104 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_438: # %vector.body4668 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_438 -# %bb.439: # %vector.body4673.preheader + ori $a1, $s2, 3328 +.LBB5_438: # %vector.body4955 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_438 +# %bb.439: # %vector.body4963.preheader lu12i.w $a0, 93 - ori $a0, $a0, 3156 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 3160 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_440: # %vector.body4673 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_440 + ori $a1, $s2, 3328 +.LBB5_440: # %vector.body4963 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_440 # %bb.441: # %.preheader.i1840.preheader lu12i.w $a0, 125 ori $a0, $a0, 128 @@ -5905,30 +5849,30 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_442 b .LBB5_573 -.LBB5_443: # %vector.body4639.preheader +.LBB5_443: # %vector.body4923.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_444: # %vector.body4639 +.LBB5_444: # %vector.body4923 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_444 -# %bb.445: # %vector.body4644.preheader +# %bb.445: # %vector.body4928.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 -.LBB5_446: # %vector.body4644 +.LBB5_446: # %vector.body4928 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5959,14 +5903,14 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_446 -# %bb.447: # %vector.body4651.preheader +# %bb.447: # %vector.body4935.preheader lu12i.w $a4, 62 - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 2096 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_448: # %vector.body4651 +.LBB5_448: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5997,12 +5941,12 @@ init: # @init addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 bnez $a3, .LBB5_448 -# %bb.449: # %vector.body4658.preheader +# %bb.449: # %vector.body4942.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_450: # %vector.body4658 +.LBB5_450: # %vector.body4942 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6010,154 +5954,148 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_450 b .LBB5_573 -.LBB5_451: # %vector.body4624.preheader +.LBB5_451: # %vector.body4902.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_452: # %vector.body4624 +.LBB5_452: # %vector.body4902 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_452 -# %bb.453: # %vector.body4629.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.453: # %vector.body4907.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_454: # %vector.body4629 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_454: # %vector.body4907 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_454 -# %bb.455: # %vector.body4634.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.455: # %vector.body4915.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_456: # %vector.body4634 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_456: # %vector.body4915 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_456 b .LBB5_573 -.LBB5_457: # %vector.body4599.preheader +.LBB5_457: # %vector.body4871.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_458: # %vector.body4599 +.LBB5_458: # %vector.body4871 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_458 -# %bb.459: # %vector.body4604.preheader +# %bb.459: # %vector.body4876.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_460: # %vector.body4604 +.LBB5_460: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_460 -# %bb.461: # %vector.body4609.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.461: # %vector.body4881.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_462: # %vector.body4609 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_462: # %vector.body4881 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_462 -# %bb.463: # %vector.body4614.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.463: # %vector.body4889.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_464: # %vector.body4614 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_464 -# %bb.465: # %vector.body4619.preheader + ori $a3, $a1, 3328 +.LBB5_464: # %vector.body4889 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 + bnez $a3, .LBB5_464 +# %bb.465: # %vector.body4897.preheader lu12i.w $a2, 125 ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3264 -.LBB5_466: # %vector.body4619 +.LBB5_466: # %vector.body4897 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6165,45 +6103,45 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_466 b .LBB5_573 -.LBB5_467: # %vector.body4574.preheader +.LBB5_467: # %vector.body4843.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_468: # %vector.body4574 +.LBB5_468: # %vector.body4843 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_468 -# %bb.469: # %vector.body4579.preheader +# %bb.469: # %vector.body4848.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_470: # %vector.body4579 +.LBB5_470: # %vector.body4848 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_470 -# %bb.471: # %vector.body4584.preheader +# %bb.471: # %vector.body4853.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_472: # %vector.body4584 +.LBB5_472: # %vector.body4853 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_472 -# %bb.473: # %vector.body4589.preheader +# %bb.473: # %vector.body4858.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a2, $a0, $a2 @@ -6211,37 +6149,36 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_474: # %vector.body4589 +.LBB5_474: # %vector.body4858 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_474 -# %bb.475: # %vector.body4594.preheader +# %bb.475: # %vector.body4863.preheader lu12i.w $a2, 125 - ori $a2, $a2, 132 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_476: # %vector.body4594 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_476: # %vector.body4863 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_476 b .LBB5_573 .LBB5_477: # %.preheader.i1927.preheader @@ -6258,74 +6195,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_478: # %vector.body4554 +.LBB5_478: # %vector.body4817 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_478 -# %bb.479: # %vector.body4559.preheader +# %bb.479: # %vector.body4822.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_480: # %vector.body4559 +.LBB5_480: # %vector.body4822 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_480 -# %bb.481: # %vector.body4564.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.481: # %vector.body4827.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_482: # %vector.body4564 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_482: # %vector.body4827 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_482 -# %bb.483: # %vector.body4569.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.483: # %vector.body4835.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_484: # %vector.body4569 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_484: # %vector.body4835 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_484 b .LBB5_573 .LBB5_485: # %.preheader.i1949.preheader @@ -6461,669 +6395,638 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_490 b .LBB5_573 -.LBB5_491: # %vector.body4519.preheader +.LBB5_491: # %vector.body4773.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_492: # %vector.body4519 +.LBB5_492: # %vector.body4773 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_492 -# %bb.493: # %vector.body4524.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.493: # %vector.body4778.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_494: # %vector.body4524 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_494: # %vector.body4778 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_494 -# %bb.495: # %vector.body4529.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.495: # %vector.body4786.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_496: # %vector.body4529 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_496: # %vector.body4786 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_496 -# %bb.497: # %vector.body4534.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.497: # %vector.body4794.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_498: # %vector.body4534 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_498: # %vector.body4794 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_498 b .LBB5_573 -.LBB5_499: # %vector.body4489.preheader +.LBB5_499: # %vector.body4734.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_500: # %vector.body4489 +.LBB5_500: # %vector.body4734 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_500 -# %bb.501: # %vector.body4494.preheader +# %bb.501: # %vector.body4739.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_502: # %vector.body4494 +.LBB5_502: # %vector.body4739 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_502 -# %bb.503: # %vector.body4499.preheader +# %bb.503: # %vector.body4744.preheader lu12i.w $a2, 46 ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_504: # %vector.body4499 +.LBB5_504: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_504 -# %bb.505: # %vector.body4504.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.505: # %vector.body4749.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_506: # %vector.body4504 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_506: # %vector.body4749 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_506 -# %bb.507: # %vector.body4509.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.507: # %vector.body4757.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_508: # %vector.body4509 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_508: # %vector.body4757 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_508 -# %bb.509: # %vector.body4514.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.509: # %vector.body4765.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_510: # %vector.body4514 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_510: # %vector.body4765 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_510 b .LBB5_573 -.LBB5_511: # %vector.body4459.preheader +.LBB5_511: # %vector.body4695.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_512: # %vector.body4459 +.LBB5_512: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_512 -# %bb.513: # %vector.body4464.preheader +# %bb.513: # %vector.body4700.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_514: # %vector.body4464 +.LBB5_514: # %vector.body4700 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_514 -# %bb.515: # %vector.body4469.preheader +# %bb.515: # %vector.body4705.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_516: # %vector.body4469 +.LBB5_516: # %vector.body4705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_516 -# %bb.517: # %vector.body4474.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.517: # %vector.body4710.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_518: # %vector.body4474 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_518: # %vector.body4710 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_518 -# %bb.519: # %vector.body4479.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.519: # %vector.body4718.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_520: # %vector.body4479 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_520: # %vector.body4718 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_520 -# %bb.521: # %vector.body4484.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.521: # %vector.body4726.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_522: # %vector.body4484 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_522: # %vector.body4726 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_522 b .LBB5_573 -.LBB5_523: # %vector.body4429.preheader +.LBB5_523: # %vector.body4656.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_524: # %vector.body4429 +.LBB5_524: # %vector.body4656 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_524 -# %bb.525: # %vector.body4434.preheader +# %bb.525: # %vector.body4661.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_526: # %vector.body4434 +.LBB5_526: # %vector.body4661 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_526 -# %bb.527: # %vector.body4439.preheader +# %bb.527: # %vector.body4666.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_528: # %vector.body4439 +.LBB5_528: # %vector.body4666 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_528 -# %bb.529: # %vector.body4444.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.529: # %vector.body4671.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_530: # %vector.body4444 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_530: # %vector.body4671 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_530 -# %bb.531: # %vector.body4449.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.531: # %vector.body4679.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_532: # %vector.body4449 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_532: # %vector.body4679 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_532 -# %bb.533: # %vector.body4454.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.533: # %vector.body4687.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_534: # %vector.body4454 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_534: # %vector.body4687 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_534 b .LBB5_573 -.LBB5_535: # %vector.body4404.preheader +.LBB5_535: # %vector.body4622.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_536: # %vector.body4404 +.LBB5_536: # %vector.body4622 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_536 -# %bb.537: # %vector.body4409.preheader +# %bb.537: # %vector.body4627.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_538: # %vector.body4409 +.LBB5_538: # %vector.body4627 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_538 -# %bb.539: # %vector.body4414.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.539: # %vector.body4632.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_540: # %vector.body4414 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_540: # %vector.body4632 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_540 -# %bb.541: # %vector.body4419.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.541: # %vector.body4640.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_542: # %vector.body4419 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_542: # %vector.body4640 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_542 -# %bb.543: # %vector.body4424.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.543: # %vector.body4648.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_544: # %vector.body4424 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_544: # %vector.body4648 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_544 b .LBB5_573 -.LBB5_545: # %vector.body4389.preheader +.LBB5_545: # %vector.body4601.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_546: # %vector.body4389 +.LBB5_546: # %vector.body4601 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_546 -# %bb.547: # %vector.body4394.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.547: # %vector.body4606.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_548: # %vector.body4394 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_548: # %vector.body4606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_548 -# %bb.549: # %vector.body4399.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.549: # %vector.body4614.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_550: # %vector.body4399 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_550: # %vector.body4614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_550 b .LBB5_573 -.LBB5_551: # %vector.body4374.preheader +.LBB5_551: # %vector.body4580.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_552: # %vector.body4374 +.LBB5_552: # %vector.body4580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_552 -# %bb.553: # %vector.body4379.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.553: # %vector.body4585.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_554: # %vector.body4379 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_554: # %vector.body4585 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_554 -# %bb.555: # %vector.body4384.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.555: # %vector.body4593.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_556: # %vector.body4384 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_556: # %vector.body4593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_556 b .LBB5_573 .LBB5_557: # %.preheader.i2121.preheader @@ -7140,19 +7043,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_558: # %vector.body4364 +.LBB5_558: # %vector.body4570 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_558 -# %bb.559: # %vector.body4369.preheader +# %bb.559: # %vector.body4575.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_560: # %vector.body4369 +.LBB5_560: # %vector.body4575 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7174,7 +7077,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_562: # %vector.body4359 +.LBB5_562: # %vector.body4565 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7196,7 +7099,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_564: # %vector.body4354 +.LBB5_564: # %vector.body4560 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7204,31 +7107,30 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_564 b .LBB5_573 -.LBB5_565: # %vector.body4349.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_566: # %vector.body4349 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_566 +.LBB5_565: # %vector.body4552.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_566: # %vector.body4552 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_566 b .LBB5_573 .LBB5_567: # %.preheader.i2154.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7377,43 +7279,43 @@ init: # @init bnez $a2, .LBB5_572 .LBB5_573: # %set1d.exit1086 move $a0, $zero - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $fp, $sp, 64 # 8-byte Folded Reload - ld.d $ra, $sp, 72 # 8-byte Folded Reload - addi.d $sp, $sp, 80 + ld.d $s2, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload + ld.d $s0, $sp, 72 # 8-byte Folded Reload + ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $ra, $sp, 88 # 8-byte Folded Reload + addi.d $sp, $sp, 96 ret .LBB5_574: # %.preheader.i2177.preheader pcalau12i $a0, %pc_hi20(global_data+640192) addi.d $a0, $a0, %pc_lo12(global_data+640192) b .LBB5_345 -.LBB5_575: # %vector.body4325.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_576: # %vector.body4325 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_576 +.LBB5_575: # %vector.body4525.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_576: # %vector.body4525 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_576 b .LBB5_573 -.LBB5_577: # %vector.body4320.preheader +.LBB5_577: # %vector.body4520.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 @@ -7421,7 +7323,7 @@ init: # @init lu12i.w $a2, 260096 ori $a2, $a2, 8 xvreplgr2vr.w $xr0, $a2 -.LBB5_578: # %vector.body4320 +.LBB5_578: # %vector.body4520 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7429,162 +7331,155 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_578 b .LBB5_573 -.LBB5_579: # %vector.body4310.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_580: # %vector.body4310 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_579: # %vector.body4504.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_580: # %vector.body4504 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_580 -# %bb.581: # %vector.body4315.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_582: # %vector.body4315 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.581: # %vector.body4512.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_582: # %vector.body4512 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_582 b .LBB5_573 -.LBB5_583: # %vector.body4305.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_584: # %vector.body4305 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_584 +.LBB5_583: # %vector.body4496.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_584: # %vector.body4496 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_584 b .LBB5_573 -.LBB5_585: # %vector.body4300.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_586: # %vector.body4300 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_586 +.LBB5_585: # %vector.body4488.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_586: # %vector.body4488 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_586 b .LBB5_573 -.LBB5_587: # %vector.body4295.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_588: # %vector.body4295 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_588 +.LBB5_587: # %vector.body4480.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_588: # %vector.body4480 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_588 b .LBB5_573 -.LBB5_589: # %vector.body4290.preheader +.LBB5_589: # %vector.body4472.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_590: # %vector.body4290 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_590 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_590: # %vector.body4472 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_590 b .LBB5_607 .LBB5_591: # %.preheader.i2230.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7603,79 +7498,74 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a1, $fp, $a0 - ori $a2, $zero, 2 + ori $a0, $a0, 2104 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + add.d $a2, $fp, $a0 lu12i.w $a0, 7 ori $a3, $a0, 3328 -.LBB5_592: # %vector.body4275 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_592: # %vector.body4448 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_592 -# %bb.593: # %vector.body4280.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.593: # %vector.body4456.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_594: # %vector.body4280 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_594: # %vector.body4456 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_594 -# %bb.595: # %vector.body4285.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.595: # %vector.body4464.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_596: # %vector.body4285 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_596: # %vector.body4464 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_596 b .LBB5_573 .LBB5_597: # %.preheader34.i2253.preheader @@ -7734,42 +7624,41 @@ init: # @init lu12i.w $a1, 220 ori $a1, $a1, 1212 b .LBB5_630 -.LBB5_600: # %vector.body4263.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_601: # %vector.body4263 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_601 - b .LBB5_573 -.LBB5_602: # %vector.body4256.preheader +.LBB5_600: # %vector.body4433.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_601: # %vector.body4433 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_601 + b .LBB5_573 +.LBB5_602: # %vector.body4426.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_603: # %vector.body4256 +.LBB5_603: # %vector.body4426 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -7804,45 +7693,44 @@ init: # @init pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) b .LBB5_610 -.LBB5_605: # %vector.body4251.preheader +.LBB5_605: # %vector.body4418.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_606: # %vector.body4251 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_606 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_606: # %vector.body4418 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_606 .LBB5_607: # %set1d.exit2229 lu12i.w $a1, 31 ori $a1, $a1, 1020 lu12i.w $a2, -262144 b .LBB5_687 -.LBB5_608: # %vector.body4246.preheader +.LBB5_608: # %vector.body4413.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_609: # %vector.body4246 +.LBB5_609: # %vector.body4413 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7855,14 +7743,14 @@ init: # @init add.d $a0, $a0, $a2 ori $a2, $a1, 1024 b .LBB5_346 -.LBB5_611: # %vector.body4241.preheader +.LBB5_611: # %vector.body4408.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_612: # %vector.body4241 +.LBB5_612: # %vector.body4408 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7886,147 +7774,141 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 b .LBB5_573 -.LBB5_614: # %vector.body4216.preheader +.LBB5_614: # %vector.body4374.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_615: # %vector.body4216 +.LBB5_615: # %vector.body4374 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_615 -# %bb.616: # %vector.body4221.preheader +# %bb.616: # %vector.body4379.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_617: # %vector.body4221 +.LBB5_617: # %vector.body4379 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_617 -# %bb.618: # %vector.body4226.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.618: # %vector.body4384.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_619: # %vector.body4226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_619: # %vector.body4384 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_619 -# %bb.620: # %vector.body4231.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.620: # %vector.body4392.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_621: # %vector.body4231 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_621: # %vector.body4392 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_621 -# %bb.622: # %vector.body4236.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.622: # %vector.body4400.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_623: # %vector.body4236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_623: # %vector.body4400 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_623 b .LBB5_573 -.LBB5_624: # %vector.body4211.preheader +.LBB5_624: # %vector.body4366.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_625: # %vector.body4211 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_625 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_625: # %vector.body4366 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_625 # %bb.626: # %set1d.exit2327 lu12i.w $a1, 31 b .LBB5_686 -.LBB5_627: # %vector.body4204.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_627: # %vector.body4359.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_628: # %vector.body4204 +.LBB5_628: # %vector.body4359 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8075,81 +7957,77 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ori $a0, $s0, 1044 + ori $a0, $s0, 1048 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_632: # %vector.body4199 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_632 - b .LBB5_573 -.LBB5_633: # %vector.body4189.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_634: # %vector.body4189 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_634 -# %bb.635: # %vector.body4194.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_636: # %vector.body4194 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_632: # %vector.body4351 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_632 + b .LBB5_573 +.LBB5_633: # %vector.body4335.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_634: # %vector.body4335 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_634 +# %bb.635: # %vector.body4343.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_636: # %vector.body4343 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_636 b .LBB5_573 .LBB5_637: # %.preheader34.i2352.preheader @@ -8248,26 +8126,26 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_640 b .LBB5_573 -.LBB5_641: # %vector.body4167.preheader +.LBB5_641: # %vector.body4313.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_642: # %vector.body4167 +.LBB5_642: # %vector.body4313 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_642 -# %bb.643: # %vector.body4172.preheader +# %bb.643: # %vector.body4318.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_644: # %vector.body4172 +.LBB5_644: # %vector.body4318 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8275,78 +8153,75 @@ init: # @init addi.d $a2, $a2, 64 bnez $a1, .LBB5_644 b .LBB5_653 -.LBB5_645: # %vector.body4157.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_646: # %vector.body4157 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_645: # %vector.body4297.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_646: # %vector.body4297 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_646 -# %bb.647: # %vector.body4162.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_648: # %vector.body4162 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.647: # %vector.body4305.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_648: # %vector.body4305 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_648 b .LBB5_573 -.LBB5_649: # %vector.body4147.preheader +.LBB5_649: # %vector.body4287.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_650: # %vector.body4147 +.LBB5_650: # %vector.body4287 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_650 -# %bb.651: # %vector.body4152.preheader +# %bb.651: # %vector.body4292.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_652: # %vector.body4152 +.LBB5_652: # %vector.body4292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8359,134 +8234,128 @@ init: # @init lu12i.w $a2, 260096 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_654: # %vector.body4132.preheader +.LBB5_654: # %vector.body4266.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_655: # %vector.body4132 +.LBB5_655: # %vector.body4266 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_655 -# %bb.656: # %vector.body4137.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.656: # %vector.body4271.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_657: # %vector.body4137 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_657: # %vector.body4271 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_657 -# %bb.658: # %vector.body4142.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_659: # %vector.body4142 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.658: # %vector.body4279.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_659: # %vector.body4279 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_659 b .LBB5_573 -.LBB5_660: # %vector.body4117.preheader +.LBB5_660: # %vector.body4245.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_661: # %vector.body4117 +.LBB5_661: # %vector.body4245 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_661 -# %bb.662: # %vector.body4122.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.662: # %vector.body4250.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_663: # %vector.body4122 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_663: # %vector.body4250 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_663 -# %bb.664: # %vector.body4127.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.664: # %vector.body4258.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_665: # %vector.body4127 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_665: # %vector.body4258 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_665 b .LBB5_573 .LBB5_666: # %.preheader.i2421.preheader @@ -8503,74 +8372,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_667: # %vector.body4097 +.LBB5_667: # %vector.body4219 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_667 -# %bb.668: # %vector.body4102.preheader +# %bb.668: # %vector.body4224.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_669: # %vector.body4102 +.LBB5_669: # %vector.body4224 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_669 -# %bb.670: # %vector.body4107.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.670: # %vector.body4229.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_671: # %vector.body4107 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_671: # %vector.body4229 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_671 -# %bb.672: # %vector.body4112.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.672: # %vector.body4237.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_673: # %vector.body4112 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_673: # %vector.body4237 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_673 b .LBB5_573 .LBB5_674: # %.preheader.i2443.preheader @@ -8719,69 +8585,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_679 b .LBB5_573 -.LBB5_680: # %vector.body4063.preheader +.LBB5_680: # %vector.body4179.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 xvldi $xr0, -1424 -.LBB5_681: # %vector.body4063 +.LBB5_681: # %vector.body4179 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a3, $a3, -16 addi.d $a1, $a1, 64 bnez $a3, .LBB5_681 -# %bb.682: # %vector.body4068.preheader +# %bb.682: # %vector.body4184.preheader lu12i.w $a1, 31 - ori $a3, $a1, 1044 - add.d $a3, $a0, $a3 - ori $a4, $zero, 2 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a4, $a1, 1048 + add.d $a4, $a0, $a4 ori $a5, $a2, 3328 -.LBB5_683: # %vector.body4068 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - addi.d $a7, $a4, -1 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa0, $a7 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a5, $a5, -2 - addi.w $a4, $a4, 2 +.LBB5_683: # %vector.body4184 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a4, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a5, $a5, -4 + addi.d $a4, $a4, 16 bnez $a5, .LBB5_683 -# %bb.684: # %vector.body4073.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2100 +# %bb.684: # %vector.body4192.preheader + lu12i.w $a4, 62 + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a3, $a4, 2104 add.d $a3, $a0, $a3 - ori $a4, $zero, 2 ori $a2, $a2, 3328 -.LBB5_685: # %vector.body4073 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a4, 31, 0 - addi.d $a6, $a4, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a2, $a2, -2 - addi.w $a4, $a4, 2 +.LBB5_685: # %vector.body4192 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a3, $a3, 16 bnez $a2, .LBB5_685 .LBB5_686: # %set1d.exit2479 ori $a1, $a1, 1020 @@ -8790,16 +8653,16 @@ init: # @init lu32i.d $a2, 0 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_688: # %vector.body4051.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_688: # %vector.body4167.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_689: # %vector.body4051 +.LBB5_689: # %vector.body4167 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8830,12 +8693,12 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_689 -# %bb.690: # %vector.body4058.preheader +# %bb.690: # %vector.body4174.preheader pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 xvldi $xr0, -1424 -.LBB5_691: # %vector.body4058 +.LBB5_691: # %vector.body4174 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -8843,28 +8706,28 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_691 b .LBB5_573 -.LBB5_692: # %vector.body4039.preheader +.LBB5_692: # %vector.body4155.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_693: # %vector.body4039 +.LBB5_693: # %vector.body4155 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_693 -# %bb.694: # %vector.body4044.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.694: # %vector.body4160.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_695: # %vector.body4044 +.LBB5_695: # %vector.body4160 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8904,15 +8767,15 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_697: # %vector.body4032 +.LBB5_697: # %vector.body4148 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8944,28 +8807,28 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_697 b .LBB5_573 -.LBB5_698: # %vector.body4020.preheader +.LBB5_698: # %vector.body4136.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_699: # %vector.body4020 +.LBB5_699: # %vector.body4136 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_699 -# %bb.700: # %vector.body4025.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.700: # %vector.body4141.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_701: # %vector.body4025 +.LBB5_701: # %vector.body4141 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8997,30 +8860,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_701 b .LBB5_573 -.LBB5_702: # %vector.body4008.preheader +.LBB5_702: # %vector.body4124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_703: # %vector.body4008 +.LBB5_703: # %vector.body4124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_703 -# %bb.704: # %vector.body4013.preheader +# %bb.704: # %vector.body4129.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_705: # %vector.body4013 +.LBB5_705: # %vector.body4129 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9052,30 +8915,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_705 b .LBB5_573 -.LBB5_706: # %vector.body3996.preheader +.LBB5_706: # %vector.body4112.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_707: # %vector.body3996 +.LBB5_707: # %vector.body4112 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_707 -# %bb.708: # %vector.body4001.preheader +# %bb.708: # %vector.body4117.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_709: # %vector.body4001 +.LBB5_709: # %vector.body4117 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9107,7 +8970,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_709 b .LBB5_573 -.LBB5_710: # %vector.body3971.preheader +.LBB5_710: # %vector.body4081.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 @@ -9115,7 +8978,7 @@ init: # @init ori $a1, $s0, 3328 xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill -.LBB5_711: # %vector.body3971 +.LBB5_711: # %vector.body4081 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9123,63 +8986,60 @@ init: # @init addi.d $a1, $a1, -16 addi.d $a0, $a0, 64 bnez $a1, .LBB5_711 -# %bb.712: # %vector.body3976.preheader +# %bb.712: # %vector.body4086.preheader lu12i.w $a1, 31 - ori $a0, $a1, 1044 - add.d $a0, $fp, $a0 - ori $a2, $zero, 2 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a2, $a1, 1048 + add.d $a2, $fp, $a2 ori $a3, $s0, 3328 -.LBB5_713: # %vector.body3976 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_713: # %vector.body4086 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_713 -# %bb.714: # %vector.body3981.preheader - lu12i.w $a0, 62 - ori $a0, $a0, 2100 +# %bb.714: # %vector.body4094.preheader + lu12i.w $a2, 62 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a2, 2104 add.d $a0, $fp, $a0 - ori $a2, $zero, 2 - ori $a3, $s0, 3328 -.LBB5_715: # %vector.body3981 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_715 -# %bb.716: # %vector.body3986.preheader + ori $a2, $s0, 3328 +.LBB5_715: # %vector.body4094 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a0, $a0, 16 + bnez $a2, .LBB5_715 +# %bb.716: # %vector.body4102.preheader lu12i.w $a0, 93 ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 xvldi $xr0, -1296 -.LBB5_717: # %vector.body3986 +.LBB5_717: # %vector.body4102 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -9198,7 +9058,7 @@ init: # @init ori $a0, $a0, 2500 add.d $a0, $fp, $a0 ori $a1, $s0, 3328 -.LBB5_719: # %vector.body3991 +.LBB5_719: # %vector.body4107 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9207,238 +9067,225 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_719 b .LBB5_573 -.LBB5_720: # %vector.body3946.preheader +.LBB5_720: # %vector.body4044.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_721: # %vector.body3946 +.LBB5_721: # %vector.body4044 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_721 -# %bb.722: # %vector.body3951.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.722: # %vector.body4049.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_723: # %vector.body3951 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_723: # %vector.body4049 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_723 -# %bb.724: # %vector.body3956.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.724: # %vector.body4057.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_725: # %vector.body3956 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_725: # %vector.body4057 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_725 -# %bb.726: # %vector.body3961.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.726: # %vector.body4065.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_727: # %vector.body3961 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_727: # %vector.body4065 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_727 -# %bb.728: # %vector.body3966.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.728: # %vector.body4073.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_729: # %vector.body3966 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_729: # %vector.body4073 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_729 b .LBB5_573 -.LBB5_730: # %vector.body3931.preheader +.LBB5_730: # %vector.body4023.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_731: # %vector.body3931 +.LBB5_731: # %vector.body4023 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_731 -# %bb.732: # %vector.body3936.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.732: # %vector.body4028.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_733: # %vector.body3936 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_733: # %vector.body4028 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_733 -# %bb.734: # %vector.body3941.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.734: # %vector.body4036.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_735: # %vector.body3941 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_735: # %vector.body4036 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_735 b .LBB5_573 -.LBB5_736: # %vector.body3921.preheader +.LBB5_736: # %vector.body4007.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_737: # %vector.body3921 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_737: # %vector.body4007 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_737 -# %bb.738: # %vector.body3926.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.738: # %vector.body4015.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_739: # %vector.body3926 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_739: # %vector.body4015 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_739 b .LBB5_573 .LBB5_740: # %.preheader.i2608.preheader @@ -9455,14 +9302,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_741: # %vector.body3911 +.LBB5_741: # %vector.body3997 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_741 -# %bb.742: # %vector.body3916.preheader +# %bb.742: # %vector.body4002.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -9470,7 +9317,7 @@ init: # @init lu12i.w $a2, 219235 ori $a2, $a2, 1981 xvreplgr2vr.w $xr0, $a2 -.LBB5_743: # %vector.body3916 +.LBB5_743: # %vector.body4002 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -9488,14 +9335,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_745: # %vector.body3904 +.LBB5_745: # %vector.body3990 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9527,248 +9374,237 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_745 b .LBB5_573 -.LBB5_746: # %vector.body3879.preheader +.LBB5_746: # %vector.body3959.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_747: # %vector.body3879 +.LBB5_747: # %vector.body3959 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_747 -# %bb.748: # %vector.body3884.preheader +# %bb.748: # %vector.body3964.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_749: # %vector.body3884 +.LBB5_749: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_749 -# %bb.750: # %vector.body3889.preheader +# %bb.750: # %vector.body3969.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_751: # %vector.body3889 +.LBB5_751: # %vector.body3969 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_751 -# %bb.752: # %vector.body3894.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.752: # %vector.body3974.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_753: # %vector.body3894 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_753: # %vector.body3974 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_753 -# %bb.754: # %vector.body3899.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.754: # %vector.body3982.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_755: # %vector.body3899 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_755: # %vector.body3982 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_755 b .LBB5_573 -.LBB5_756: # %vector.body3859.preheader +.LBB5_756: # %vector.body3930.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_757: # %vector.body3859 +.LBB5_757: # %vector.body3930 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_757 -# %bb.758: # %vector.body3864.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.758: # %vector.body3935.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_759: # %vector.body3864 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_759: # %vector.body3935 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_759 -# %bb.760: # %vector.body3869.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.760: # %vector.body3943.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_761: # %vector.body3869 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_761: # %vector.body3943 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_761 -# %bb.762: # %vector.body3874.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.762: # %vector.body3951.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_763: # %vector.body3874 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_763: # %vector.body3951 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_763 b .LBB5_573 -.LBB5_764: # %vector.body3844.preheader +.LBB5_764: # %vector.body3909.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_765: # %vector.body3844 +.LBB5_765: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_765 -# %bb.766: # %vector.body3849.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.766: # %vector.body3914.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_767: # %vector.body3849 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_767: # %vector.body3914 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_767 -# %bb.768: # %vector.body3854.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.768: # %vector.body3922.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_769: # %vector.body3854 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_769: # %vector.body3922 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_769 b .LBB5_573 .LBB5_770: # %.preheader.i2684.preheader @@ -9785,103 +9621,99 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_771: # %vector.body3829 +.LBB5_771: # %vector.body3888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_771 -# %bb.772: # %vector.body3834.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.772: # %vector.body3893.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_773: # %vector.body3834 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_773: # %vector.body3893 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_773 -# %bb.774: # %vector.body3839.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.774: # %vector.body3901.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_775: # %vector.body3839 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_775: # %vector.body3901 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_775 b .LBB5_573 -.LBB5_776: # %vector.body3819.preheader +.LBB5_776: # %vector.body3875.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 32 - lu12i.w $a2, 7 - ori $a3, $a2, 3328 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_777: # %vector.body3819 +.LBB5_777: # %vector.body3875 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 - addi.d $a1, $a1, 64 + addi.d $a2, $a2, 64 bnez $a3, .LBB5_777 -# %bb.778: # %vector.body3824.preheader - lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a0, $a0, $a1 - ori $a1, $zero, 2 - ori $a2, $a2, 3328 -.LBB5_779: # %vector.body3824 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_779 +# %bb.778: # %vector.body3880.preheader + lu12i.w $a2, 31 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 1048 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_779: # %vector.body3880 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_779 b .LBB5_573 .LBB5_780: # %.preheader.i2711.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9897,23 +9729,23 @@ init: # @init lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_781: # %vector.body3807 +.LBB5_781: # %vector.body3863 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_781 -# %bb.782: # %vector.body3812.preheader +# %bb.782: # %vector.body3868.preheader lu12i.w $a0, 62 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) pcalau12i $a2, %pc_hi20(.LCPI5_1) - xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_2) + xvld $xr1, $a2, %pc_lo12(.LCPI5_2) ori $a0, $a0, 2096 add.d $a0, $fp, $a0 ori $a1, $a1, 3328 -.LBB5_783: # %vector.body3812 +.LBB5_783: # %vector.body3868 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9959,142 +9791,135 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_785: # %vector.body3792 +.LBB5_785: # %vector.body3842 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_785 -# %bb.786: # %vector.body3797.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.786: # %vector.body3847.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_787: # %vector.body3797 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_787: # %vector.body3847 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_787 -# %bb.788: # %vector.body3802.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.788: # %vector.body3855.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_789: # %vector.body3802 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_789: # %vector.body3855 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_789 b .LBB5_573 -.LBB5_790: # %vector.body3782.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_791: # %vector.body3782 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_790: # %vector.body3826.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_791: # %vector.body3826 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_791 -# %bb.792: # %vector.body3787.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_793: # %vector.body3787 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.792: # %vector.body3834.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_793: # %vector.body3834 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_793 b .LBB5_573 -.LBB5_794: # %vector.body3770.preheader +.LBB5_794: # %vector.body3811.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_795: # %vector.body3770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_795 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_795: # %vector.body3811 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_795 # %bb.796: # %.preheader34.i2757.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -10160,127 +9985,121 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_799: # %vector.body3755 +.LBB5_799: # %vector.body3790 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_799 -# %bb.800: # %vector.body3760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.800: # %vector.body3795.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_801: # %vector.body3760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_801: # %vector.body3795 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_801 -# %bb.802: # %vector.body3765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.802: # %vector.body3803.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_803: # %vector.body3765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_803: # %vector.body3803 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_803 b .LBB5_573 -.LBB5_804: # %vector.body3740.preheader +.LBB5_804: # %vector.body3769.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_805: # %vector.body3740 +.LBB5_805: # %vector.body3769 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_805 -# %bb.806: # %vector.body3745.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.806: # %vector.body3774.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_807: # %vector.body3745 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_807: # %vector.body3774 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_807 -# %bb.808: # %vector.body3750.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.808: # %vector.body3782.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_809: # %vector.body3750 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_809: # %vector.body3782 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_809 b .LBB5_573 .LBB5_810: # %.preheader.i2797.preheader @@ -10293,14 +10112,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_811: # %vector.body3733 +.LBB5_811: # %vector.body3762 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10342,14 +10161,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_813: # %vector.body3726 +.LBB5_813: # %vector.body3755 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10391,14 +10210,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_815: # %vector.body3719 +.LBB5_815: # %vector.body3748 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10440,14 +10259,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_817: # %vector.body3712 +.LBB5_817: # %vector.body3741 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10489,14 +10308,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_819: # %vector.body3705 +.LBB5_819: # %vector.body3734 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10528,26 +10347,26 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_819 b .LBB5_573 -.LBB5_820: # %vector.body3695.preheader +.LBB5_820: # %vector.body3724.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_821: # %vector.body3695 +.LBB5_821: # %vector.body3724 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_821 -# %bb.822: # %vector.body3700.preheader +# %bb.822: # %vector.body3729.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_823: # %vector.body3700 +.LBB5_823: # %vector.body3729 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10555,95 +10374,92 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_823 b .LBB5_573 -.LBB5_824: # %vector.body3680.preheader +.LBB5_824: # %vector.body3703.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_825: # %vector.body3680 +.LBB5_825: # %vector.body3703 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_825 -# %bb.826: # %vector.body3685.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.826: # %vector.body3708.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_827: # %vector.body3685 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_827: # %vector.body3708 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_827 -# %bb.828: # %vector.body3690.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.828: # %vector.body3716.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_829: # %vector.body3690 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_829: # %vector.body3716 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_829 b .LBB5_573 -.LBB5_830: # %vector.body3668.preheader +.LBB5_830: # %vector.body3691.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_831: # %vector.body3668 +.LBB5_831: # %vector.body3691 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_831 -# %bb.832: # %vector.body3673.preheader +# %bb.832: # %vector.body3696.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_833: # %vector.body3673 +.LBB5_833: # %vector.body3696 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10675,16 +10491,16 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_833 b .LBB5_573 -.LBB5_834: # %vector.body3653.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_834: # %vector.body3674.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_835: # %vector.body3653 +.LBB5_835: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10715,7 +10531,7 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_835 -# %bb.836: # %vector.body3658.preheader +# %bb.836: # %vector.body3681.preheader pcalau12i $a1, %pc_hi20(global_data) addi.d $a1, $a1, %pc_lo12(global_data) lu12i.w $a2, 31 @@ -10723,20 +10539,20 @@ init: # @init add.d $a2, $a1, $a2 ori $a3, $a0, 3328 xvldi $xr0, -1424 -.LBB5_837: # %vector.body3658 +.LBB5_837: # %vector.body3681 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_837 -# %bb.838: # %vector.body3663.preheader +# %bb.838: # %vector.body3686.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 xvldi $xr0, -1296 -.LBB5_839: # %vector.body3663 +.LBB5_839: # %vector.body3686 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -10744,40 +10560,40 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_839 b .LBB5_573 -.LBB5_840: # %vector.body3638.preheader +.LBB5_840: # %vector.body3659.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_841: # %vector.body3638 +.LBB5_841: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_841 -# %bb.842: # %vector.body3643.preheader +# %bb.842: # %vector.body3664.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -3264 -.LBB5_843: # %vector.body3643 +.LBB5_843: # %vector.body3664 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_843 -# %bb.844: # %vector.body3648.preheader +# %bb.844: # %vector.body3669.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3265 -.LBB5_845: # %vector.body3648 +.LBB5_845: # %vector.body3669 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10785,82 +10601,78 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_845 b .LBB5_573 -.LBB5_846: # %vector.body3633.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_847: # %vector.body3633 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_847 - b .LBB5_573 -.LBB5_848: # %vector.body3623.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 +.LBB5_846: # %vector.body3651.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_849: # %vector.body3623 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_849 -# %bb.850: # %vector.body3628.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_851: # %vector.body3628 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_847: # %vector.body3651 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_847 + b .LBB5_573 +.LBB5_848: # %vector.body3635.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_849: # %vector.body3635 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_849 +# %bb.850: # %vector.body3643.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_851: # %vector.body3643 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_851 b .LBB5_573 .Lfunc_end5: diff --git a/results/MultiSource/Benchmarks/TSVC/LoopRerolling-flt/CMakeFiles/LoopRerolling-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/LoopRerolling-flt/CMakeFiles/LoopRerolling-flt.dir/tsc.s index 894e972..a277a82 100644 --- a/results/MultiSource/Benchmarks/TSVC/LoopRerolling-flt/CMakeFiles/LoopRerolling-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/LoopRerolling-flt/CMakeFiles/LoopRerolling-flt.dir/tsc.s @@ -11,6 +11,11 @@ .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI0_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1d .p2align 5 @@ -63,30 +68,29 @@ set1d: # @set1d bnez $a1, .LBB0_3 b .LBB0_8 .LBB0_4: # %vector.body40.preheader - addi.d $a0, $a0, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a1, %pc_hi20(.LCPI0_2) + vld $vr0, $a1, %pc_lo12(.LCPI0_2) + addi.d $a0, $a0, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB0_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB0_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB0_5 b .LBB0_8 .LBB0_6: # %.preheader.preheader move $a2, $zero @@ -118,6 +122,11 @@ set1d: # @set1d .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI1_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1ds .p2align 5 @@ -170,30 +179,29 @@ set1ds: # @set1ds bnez $a0, .LBB1_3 b .LBB1_8 .LBB1_4: # %vector.body40.preheader - addi.d $a0, $a1, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) + addi.d $a0, $a1, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB1_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB1_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB1_5 b .LBB1_8 .LBB1_6: # %.preheader.preheader move $a0, $zero @@ -685,14 +693,19 @@ check: # @check .word .LBB4_14-.LJTI4_0 .word .LBB4_17-.LJTI4_0 # -- End function - .section .rodata.cst32,"aM",@progbits,32 - .p2align 5, 0x0 # -- Begin function init + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 # -- Begin function init .LCPI5_0: + .dword 0 # 0x0 + .dword 1 # 0x1 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 +.LCPI5_1: .dword 4 # 0x4 .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 -.LCPI5_1: +.LCPI5_2: .dword 0 # 0x0 .dword 1 # 0x1 .dword 2 # 0x2 @@ -703,11 +716,12 @@ check: # @check .type init,@function init: # @init # %bb.0: - addi.d $sp, $sp, -80 - st.d $ra, $sp, 72 # 8-byte Folded Spill - st.d $fp, $sp, 64 # 8-byte Folded Spill - st.d $s0, $sp, 56 # 8-byte Folded Spill - st.d $s1, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -96 + st.d $ra, $sp, 88 # 8-byte Folded Spill + st.d $fp, $sp, 80 # 8-byte Folded Spill + st.d $s0, $sp, 72 # 8-byte Folded Spill + st.d $s1, $sp, 64 # 8-byte Folded Spill + st.d $s2, $sp, 56 # 8-byte Folded Spill move $fp, $a0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a1, $a0, %pc_lo12(.L.str.1) @@ -1661,131 +1675,122 @@ init: # @init jirl $ra, $ra, 0 bnez $a0, .LBB5_573 # %bb.136: # %vector.body.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 + addi.d $a3, $a0, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 .LBB5_137: # %vector.body # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_137 # %bb.138: # %vector.body3598.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 + lu12i.w $a3, 31 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 .LBB5_139: # %vector.body3598 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_139 -# %bb.140: # %vector.body3603.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_141: # %vector.body3603 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.140: # %vector.body3606.preheader + lu12i.w $a3, 62 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_141: # %vector.body3606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_141 -# %bb.142: # %vector.body3608.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_143: # %vector.body3608 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.142: # %vector.body3614.preheader + lu12i.w $a3, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_143: # %vector.body3614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_143 -# %bb.144: # %vector.body3613.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_145: # %vector.body3613 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 - bnez $a1, .LBB5_145 +# %bb.144: # %vector.body3622.preheader + lu12i.w $a3, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a3, 136 + add.d $a1, $a0, $a1 + ori $a2, $a2, 3328 +.LBB5_145: # %vector.body3622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_145 # %bb.146: # %.preheader34.i2938.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -1837,7 +1842,7 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_147 b .LBB5_573 -.LBB5_148: # %vector.body5445.preheader +.LBB5_148: # %vector.body5846.preheader pcalau12i $a0, %pc_hi20(X+4) addi.d $a0, $a0, %pc_lo12(X+4) pcalau12i $a1, %pc_hi20(V+4) @@ -1852,7 +1857,7 @@ init: # @init lu12i.w $a6, 7 ori $a6, $a6, 3328 .p2align 4, , 16 -.LBB5_149: # %vector.body5445 +.LBB5_149: # %vector.body5846 # =>This Inner Loop Header: Depth=1 bstrpick.d $a7, $a5, 31, 0 addi.d $t0, $a5, -5 @@ -1896,7 +1901,7 @@ init: # @init addi.d $a4, $a4, 8 bnez $a6, .LBB5_149 b .LBB5_573 -.LBB5_150: # %vector.body5412.preheader +.LBB5_150: # %vector.body5813.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -1904,24 +1909,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_151: # %vector.body5412 +.LBB5_151: # %vector.body5813 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_151 -# %bb.152: # %vector.body5417.preheader +# %bb.152: # %vector.body5818.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_153: # %vector.body5417 +.LBB5_153: # %vector.body5818 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1952,15 +1957,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_153 -# %bb.154: # %vector.body5424.preheader +# %bb.154: # %vector.body5825.preheader lu12i.w $a4, 62 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 2096 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_155: # %vector.body5424 +.LBB5_155: # %vector.body5825 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1991,15 +1996,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_155 -# %bb.156: # %vector.body5431.preheader +# %bb.156: # %vector.body5832.preheader lu12i.w $a4, 93 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 3152 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_157: # %vector.body5431 +.LBB5_157: # %vector.body5832 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2030,15 +2035,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_157 -# %bb.158: # %vector.body5438.preheader +# %bb.158: # %vector.body5839.preheader lu12i.w $a4, 125 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_159: # %vector.body5438 +.LBB5_159: # %vector.body5839 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2070,7 +2075,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_159 b .LBB5_573 -.LBB5_160: # %vector.body5400.preheader +.LBB5_160: # %vector.body5801.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2078,24 +2083,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_161: # %vector.body5400 +.LBB5_161: # %vector.body5801 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_161 -# %bb.162: # %vector.body5405.preheader +# %bb.162: # %vector.body5806.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_163: # %vector.body5405 +.LBB5_163: # %vector.body5806 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2127,7 +2132,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_163 b .LBB5_573 -.LBB5_164: # %vector.body5388.preheader +.LBB5_164: # %vector.body5789.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2135,24 +2140,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_165: # %vector.body5388 +.LBB5_165: # %vector.body5789 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_165 -# %bb.166: # %vector.body5393.preheader +# %bb.166: # %vector.body5794.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_167: # %vector.body5393 +.LBB5_167: # %vector.body5794 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2288,7 +2293,7 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_171 b .LBB5_573 -.LBB5_172: # %vector.body5354.preheader +.LBB5_172: # %vector.body5755.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2296,7 +2301,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_173: # %vector.body5354 +.LBB5_173: # %vector.body5755 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2436,14 +2441,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_179 b .LBB5_573 -.LBB5_180: # %vector.body5349.preheader +.LBB5_180: # %vector.body5750.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_181: # %vector.body5349 +.LBB5_181: # %vector.body5750 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -2451,7 +2456,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_181 b .LBB5_573 -.LBB5_182: # %vector.body5339.preheader +.LBB5_182: # %vector.body5740.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2459,7 +2464,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_183: # %vector.body5339 +.LBB5_183: # %vector.body5740 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2610,30 +2615,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_189 b .LBB5_573 -.LBB5_190: # %vector.body5315.preheader +.LBB5_190: # %vector.body5716.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_191: # %vector.body5315 +.LBB5_191: # %vector.body5716 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_191 -# %bb.192: # %vector.body5320.preheader +# %bb.192: # %vector.body5721.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_193: # %vector.body5320 +.LBB5_193: # %vector.body5721 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2665,30 +2670,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_193 b .LBB5_573 -.LBB5_194: # %vector.body5303.preheader +.LBB5_194: # %vector.body5704.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_195: # %vector.body5303 +.LBB5_195: # %vector.body5704 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_195 -# %bb.196: # %vector.body5308.preheader +# %bb.196: # %vector.body5709.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_197: # %vector.body5308 +.LBB5_197: # %vector.body5709 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2734,74 +2739,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_199: # %vector.body5283 +.LBB5_199: # %vector.body5678 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_199 -# %bb.200: # %vector.body5288.preheader +# %bb.200: # %vector.body5683.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_201: # %vector.body5288 +.LBB5_201: # %vector.body5683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_201 -# %bb.202: # %vector.body5293.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.202: # %vector.body5688.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_203: # %vector.body5293 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_203: # %vector.body5688 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_203 -# %bb.204: # %vector.body5298.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.204: # %vector.body5696.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_205: # %vector.body5298 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_205: # %vector.body5696 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_205 b .LBB5_573 .LBB5_206: # %.preheader.i1195.preheader @@ -2818,74 +2820,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_207: # %vector.body5263 +.LBB5_207: # %vector.body5652 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_207 -# %bb.208: # %vector.body5268.preheader +# %bb.208: # %vector.body5657.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_209: # %vector.body5268 +.LBB5_209: # %vector.body5657 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_209 -# %bb.210: # %vector.body5273.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.210: # %vector.body5662.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_211: # %vector.body5273 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_211: # %vector.body5662 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_211 -# %bb.212: # %vector.body5278.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.212: # %vector.body5670.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_213: # %vector.body5278 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_213: # %vector.body5670 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_213 b .LBB5_573 .LBB5_214: # %.preheader.i1217.preheader @@ -3072,31 +3071,30 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_221 -# %bb.222: # %vector.body5236.preheader - pcalau12i $a1, %pc_hi20(array+4) - addi.d $a1, $a1, %pc_lo12(array+4) - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_223: # %vector.body5236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_223 +# %bb.222: # %vector.body5622.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(array+8) + addi.d $a1, $a1, %pc_lo12(array+8) + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_223: # %vector.body5622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_223 # %bb.224: # %.preheader34.i1253.preheader lu12i.w $a1, 284 ori $a1, $a1, 1888 @@ -3162,87 +3160,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_227: # %vector.body5211 +.LBB5_227: # %vector.body5588 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_227 -# %bb.228: # %vector.body5216.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.228: # %vector.body5593.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_229: # %vector.body5216 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_229: # %vector.body5593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_229 -# %bb.230: # %vector.body5221.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.230: # %vector.body5601.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_231: # %vector.body5221 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_231: # %vector.body5601 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_231 -# %bb.232: # %vector.body5226.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.232: # %vector.body5609.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_233: # %vector.body5226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_233: # %vector.body5609 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_233 b .LBB5_573 .LBB5_234: # %.preheader.i1284.preheader @@ -3259,32 +3252,32 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -3264 -.LBB5_235: # %vector.body5196 +.LBB5_235: # %vector.body5573 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_235 -# %bb.236: # %vector.body5201.preheader +# %bb.236: # %vector.body5578.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_237: # %vector.body5201 +.LBB5_237: # %vector.body5578 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_237 -# %bb.238: # %vector.body5206.preheader +# %bb.238: # %vector.body5583.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_239: # %vector.body5206 +.LBB5_239: # %vector.body5583 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -3292,30 +3285,30 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_239 b .LBB5_573 -.LBB5_240: # %vector.body5184.preheader +.LBB5_240: # %vector.body5561.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_241: # %vector.body5184 +.LBB5_241: # %vector.body5561 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_241 -# %bb.242: # %vector.body5189.preheader +# %bb.242: # %vector.body5566.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_243: # %vector.body5189 +.LBB5_243: # %vector.body5566 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3392,65 +3385,62 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_245 -# %bb.246: # %vector.body5174.preheader +# %bb.246: # %vector.body5545.preheader lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_247: # %vector.body5174 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_247: # %vector.body5545 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_247 -# %bb.248: # %vector.body5179.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.248: # %vector.body5553.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_249: # %vector.body5179 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_249: # %vector.body5553 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_249 b .LBB5_573 -.LBB5_250: # %vector.body5157.preheader +.LBB5_250: # %vector.body5528.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_251: # %vector.body5157 +.LBB5_251: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3506,30 +3496,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_253 b .LBB5_573 -.LBB5_254: # %vector.body5145.preheader +.LBB5_254: # %vector.body5516.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_255: # %vector.body5145 +.LBB5_255: # %vector.body5516 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_255 -# %bb.256: # %vector.body5150.preheader +# %bb.256: # %vector.body5521.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_257: # %vector.body5150 +.LBB5_257: # %vector.body5521 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3561,14 +3551,14 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_257 b .LBB5_573 -.LBB5_258: # %vector.body5125.preheader +.LBB5_258: # %vector.body5487.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 xvldi $xr0, -1424 -.LBB5_259: # %vector.body5125 +.LBB5_259: # %vector.body5487 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3583,274 +3573,263 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 + lu12i.w $a1, 62 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 2104 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_261: # %vector.body5130 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_261: # %vector.body5492 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_261 -# %bb.262: # %vector.body5135.preheader - lu12i.w $a0, 93 - ori $a0, $a0, 3156 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 +# %bb.262: # %vector.body5500.preheader + lu12i.w $a1, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 3160 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_263: # %vector.body5135 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_263: # %vector.body5500 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_263 -# %bb.264: # %vector.body5140.preheader - lu12i.w $a0, 125 - ori $a0, $a0, 132 +# %bb.264: # %vector.body5508.preheader + lu12i.w $a1, 125 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a1, 136 add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - ori $a2, $s0, 3328 -.LBB5_265: # %vector.body5140 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_265 + ori $a1, $s0, 3328 +.LBB5_265: # %vector.body5508 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_265 b .LBB5_573 -.LBB5_266: # %vector.body5094.preheader +.LBB5_266: # %vector.body5450.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_267: # %vector.body5094 +.LBB5_267: # %vector.body5450 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_267 -# %bb.268: # %vector.body5099.preheader +# %bb.268: # %vector.body5455.preheader lu12i.w $a2, 31 ori $a3, $a2, 1048 add.d $a4, $a0, $a3 lu12i.w $a3, 3 ori $a5, $a3, 3712 lu12i.w $a6, 260096 -.LBB5_269: # %vector.body5099 +.LBB5_269: # %vector.body5455 # =>This Inner Loop Header: Depth=1 st.w $a6, $a4, -8 st.w $a6, $a4, 0 addi.d $a5, $a5, -2 addi.d $a4, $a4, 16 bnez $a5, .LBB5_269 -# %bb.270: # %vector.body5104.preheader +# %bb.270: # %vector.body5460.preheader ori $a2, $a2, 1052 add.d $a2, $a0, $a2 ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 -.LBB5_271: # %vector.body5104 +.LBB5_271: # %vector.body5460 # =>This Inner Loop Header: Depth=1 st.w $a4, $a2, -8 st.w $a4, $a2, 0 addi.d $a3, $a3, -2 addi.d $a2, $a2, 16 bnez $a3, .LBB5_271 -# %bb.272: # %vector.body5110.preheader +# %bb.272: # %vector.body5466.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_273: # %vector.body5110 +.LBB5_273: # %vector.body5466 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_273 -# %bb.274: # %vector.body5115.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.274: # %vector.body5471.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_275: # %vector.body5115 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_275: # %vector.body5471 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_275 -# %bb.276: # %vector.body5120.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.276: # %vector.body5479.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_277: # %vector.body5120 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_277: # %vector.body5479 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_277 b .LBB5_573 -.LBB5_278: # %vector.body5079.preheader +.LBB5_278: # %vector.body5429.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_279: # %vector.body5079 +.LBB5_279: # %vector.body5429 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_279 -# %bb.280: # %vector.body5084.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.280: # %vector.body5434.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_281: # %vector.body5084 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_281: # %vector.body5434 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_281 -# %bb.282: # %vector.body5089.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.282: # %vector.body5442.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_283: # %vector.body5089 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_283: # %vector.body5442 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_283 b .LBB5_573 -.LBB5_284: # %vector.body5067.preheader +.LBB5_284: # %vector.body5417.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_285: # %vector.body5067 +.LBB5_285: # %vector.body5417 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_285 -# %bb.286: # %vector.body5072.preheader +# %bb.286: # %vector.body5422.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_287: # %vector.body5072 +.LBB5_287: # %vector.body5422 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3882,30 +3861,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_287 b .LBB5_573 -.LBB5_288: # %vector.body5055.preheader +.LBB5_288: # %vector.body5405.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_289: # %vector.body5055 +.LBB5_289: # %vector.body5405 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_289 -# %bb.290: # %vector.body5060.preheader +# %bb.290: # %vector.body5410.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_291: # %vector.body5060 +.LBB5_291: # %vector.body5410 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3937,30 +3916,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_291 b .LBB5_573 -.LBB5_292: # %vector.body5043.preheader +.LBB5_292: # %vector.body5393.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_293: # %vector.body5043 +.LBB5_293: # %vector.body5393 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_293 -# %bb.294: # %vector.body5048.preheader +# %bb.294: # %vector.body5398.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_295: # %vector.body5048 +.LBB5_295: # %vector.body5398 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3992,30 +3971,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_295 b .LBB5_573 -.LBB5_296: # %vector.body5031.preheader +.LBB5_296: # %vector.body5381.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_297: # %vector.body5031 +.LBB5_297: # %vector.body5381 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_297 -# %bb.298: # %vector.body5036.preheader +# %bb.298: # %vector.body5386.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_299: # %vector.body5036 +.LBB5_299: # %vector.body5386 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4047,30 +4026,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_299 b .LBB5_573 -.LBB5_300: # %vector.body5019.preheader +.LBB5_300: # %vector.body5369.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_301: # %vector.body5019 +.LBB5_301: # %vector.body5369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_301 -# %bb.302: # %vector.body5024.preheader +# %bb.302: # %vector.body5374.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_303: # %vector.body5024 +.LBB5_303: # %vector.body5374 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4102,69 +4081,66 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_303 b .LBB5_573 -.LBB5_304: # %vector.body5004.preheader +.LBB5_304: # %vector.body5348.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_305: # %vector.body5004 +.LBB5_305: # %vector.body5348 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_305 -# %bb.306: # %vector.body5009.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.306: # %vector.body5353.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_307: # %vector.body5009 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_307: # %vector.body5353 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_307 -# %bb.308: # %vector.body5014.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.308: # %vector.body5361.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_309: # %vector.body5014 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_309: # %vector.body5361 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_309 b .LBB5_573 .LBB5_310: # %.preheader.i1469.preheader @@ -4181,254 +4157,241 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_311: # %vector.body4984 +.LBB5_311: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_311 -# %bb.312: # %vector.body4989.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.312: # %vector.body5324.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_313: # %vector.body4989 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_313: # %vector.body5324 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_313 -# %bb.314: # %vector.body4994.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.314: # %vector.body5332.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_315: # %vector.body4994 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_315: # %vector.body5332 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_315 -# %bb.316: # %vector.body4999.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.316: # %vector.body5340.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_317: # %vector.body4999 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_317: # %vector.body5340 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_317 b .LBB5_573 -.LBB5_318: # %vector.body4964.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_319: # %vector.body4964 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_318: # %vector.body5293.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_319: # %vector.body5293 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_319 -# %bb.320: # %vector.body4969.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1072 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 +# %bb.320: # %vector.body5301.preheader + lu12i.w $a3, 31 + ori $a3, $a3, 1072 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 xvldi $xr0, -1424 -.LBB5_321: # %vector.body4969 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_321 -# %bb.322: # %vector.body4974.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2128 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 -.LBB5_323: # %vector.body4974 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_323 -# %bb.324: # %vector.body4979.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_325: # %vector.body4979 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_321: # %vector.body5301 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_321 +# %bb.322: # %vector.body5306.preheader + lu12i.w $a3, 62 + ori $a3, $a3, 2128 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 +.LBB5_323: # %vector.body5306 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_323 +# %bb.324: # %vector.body5311.preheader + lu12i.w $a3, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 3160 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_325: # %vector.body5311 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_325 b .LBB5_573 -.LBB5_326: # %vector.body4944.preheader +.LBB5_326: # %vector.body5264.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_327: # %vector.body4944 +.LBB5_327: # %vector.body5264 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_327 -# %bb.328: # %vector.body4949.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.328: # %vector.body5269.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_329: # %vector.body4949 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_329: # %vector.body5269 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_329 -# %bb.330: # %vector.body4954.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.330: # %vector.body5277.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_331: # %vector.body4954 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_331: # %vector.body5277 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_331 -# %bb.332: # %vector.body4959.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.332: # %vector.body5285.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_333: # %vector.body4959 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_333: # %vector.body5285 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_333 b .LBB5_573 .LBB5_334: # %.preheader.i1529.preheader @@ -4445,19 +4408,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_335: # %vector.body4934 +.LBB5_335: # %vector.body5254 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_335 -# %bb.336: # %vector.body4939.preheader +# %bb.336: # %vector.body5259.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_337: # %vector.body4939 +.LBB5_337: # %vector.body5259 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -4915,69 +4878,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_358 b .LBB5_573 -.LBB5_359: # %vector.body4850.preheader +.LBB5_359: # %vector.body5164.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_360: # %vector.body4850 +.LBB5_360: # %vector.body5164 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_360 -# %bb.361: # %vector.body4855.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.361: # %vector.body5169.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_362: # %vector.body4855 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a5 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_362: # %vector.body5169 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a3, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_362 -# %bb.363: # %vector.body4860.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.363: # %vector.body5177.preheader + lu12i.w $a3, 62 + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 ori $a1, $a1, 3328 -.LBB5_364: # %vector.body4860 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a4 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 +.LBB5_364: # %vector.body5177 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a2, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a1, $a1, -4 + addi.d $a2, $a2, 16 bnez $a1, .LBB5_364 # %bb.365: # %.preheader.i1631.preheader lu12i.w $a1, 156 @@ -5071,50 +5031,50 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_368 b .LBB5_573 -.LBB5_369: # %vector.body4830.preheader +.LBB5_369: # %vector.body5144.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_370: # %vector.body4830 +.LBB5_370: # %vector.body5144 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_370 -# %bb.371: # %vector.body4835.preheader +# %bb.371: # %vector.body5149.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_372: # %vector.body4835 +.LBB5_372: # %vector.body5149 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_372 -# %bb.373: # %vector.body4840.preheader +# %bb.373: # %vector.body5154.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_374: # %vector.body4840 +.LBB5_374: # %vector.body5154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_374 -# %bb.375: # %vector.body4845.preheader +# %bb.375: # %vector.body5159.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_376: # %vector.body4845 +.LBB5_376: # %vector.body5159 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5122,7 +5082,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_376 b .LBB5_573 -.LBB5_377: # %vector.body4810.preheader +.LBB5_377: # %vector.body5124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -5131,43 +5091,43 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_378: # %vector.body4810 +.LBB5_378: # %vector.body5124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_378 -# %bb.379: # %vector.body4815.preheader +# %bb.379: # %vector.body5129.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_380: # %vector.body4815 +.LBB5_380: # %vector.body5129 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_380 -# %bb.381: # %vector.body4820.preheader +# %bb.381: # %vector.body5134.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_382: # %vector.body4820 +.LBB5_382: # %vector.body5134 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_382 -# %bb.383: # %vector.body4825.preheader +# %bb.383: # %vector.body5139.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_384: # %vector.body4825 +.LBB5_384: # %vector.body5139 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5189,87 +5149,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_386: # %vector.body4790 +.LBB5_386: # %vector.body5095 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_386 -# %bb.387: # %vector.body4795.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.387: # %vector.body5100.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_388: # %vector.body4795 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_388: # %vector.body5100 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_388 -# %bb.389: # %vector.body4800.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.389: # %vector.body5108.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_390: # %vector.body4800 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_390: # %vector.body5108 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_390 -# %bb.391: # %vector.body4805.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.391: # %vector.body5116.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_392: # %vector.body4805 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_392: # %vector.body5116 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_392 b .LBB5_573 .LBB5_393: # %.preheader.i1701.preheader @@ -5286,14 +5241,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_394: # %vector.body4775 +.LBB5_394: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_394 -# %bb.395: # %vector.body4780.preheader +# %bb.395: # %vector.body5085.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -5301,19 +5256,19 @@ init: # @init lu12i.w $a3, 219235 ori $a3, $a3, 1981 xvreplgr2vr.w $xr0, $a3 -.LBB5_396: # %vector.body4780 +.LBB5_396: # %vector.body5085 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_396 -# %bb.397: # %vector.body4785.preheader +# %bb.397: # %vector.body5090.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_398: # %vector.body4785 +.LBB5_398: # %vector.body5090 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5335,87 +5290,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_400: # %vector.body4755 +.LBB5_400: # %vector.body5051 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_400 -# %bb.401: # %vector.body4760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.401: # %vector.body5056.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_402: # %vector.body4760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_402: # %vector.body5056 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_402 -# %bb.403: # %vector.body4765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.403: # %vector.body5064.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_404: # %vector.body4765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_404: # %vector.body5064 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_404 -# %bb.405: # %vector.body4770.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.405: # %vector.body5072.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_406: # %vector.body4770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_406: # %vector.body5072 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_406 b .LBB5_573 .LBB5_407: # %.preheader.i1740.preheader @@ -5432,19 +5382,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_408: # %vector.body4745 +.LBB5_408: # %vector.body5041 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_408 -# %bb.409: # %vector.body4750.preheader +# %bb.409: # %vector.body5046.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_410: # %vector.body4750 +.LBB5_410: # %vector.body5046 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5452,21 +5402,21 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_410 b .LBB5_573 -.LBB5_411: # %vector.body4725.preheader +.LBB5_411: # %vector.body5018.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_412: # %vector.body4725 +.LBB5_412: # %vector.body5018 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_412 -# %bb.413: # %vector.body4730.preheader +# %bb.413: # %vector.body5023.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 @@ -5474,49 +5424,48 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr1, $a4 -.LBB5_414: # %vector.body4730 +.LBB5_414: # %vector.body5023 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a2, -32 xvst $xr1, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_414 -# %bb.415: # %vector.body4735.preheader +# %bb.415: # %vector.body5028.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_416: # %vector.body4735 +.LBB5_416: # %vector.body5028 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_416 -# %bb.417: # %vector.body4740.preheader +# %bb.417: # %vector.body5033.preheader lu12i.w $a2, 93 - ori $a2, $a2, 3156 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_418: # %vector.body4740 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_418: # %vector.body5033 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_418 b .LBB5_573 .LBB5_419: # %.preheader.i1769.preheader @@ -5533,7 +5482,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_420: # %vector.body4720 +.LBB5_420: # %vector.body5013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5555,7 +5504,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_422: # %vector.body4715 +.LBB5_422: # %vector.body5008 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5563,14 +5512,14 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_422 b .LBB5_573 -.LBB5_423: # %vector.body4700.preheader +.LBB5_423: # %vector.body4993.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_424: # %vector.body4700 +.LBB5_424: # %vector.body4993 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5663,14 +5612,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_428 b .LBB5_573 -.LBB5_429: # %vector.body4685.preheader +.LBB5_429: # %vector.body4978.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_430: # %vector.body4685 +.LBB5_430: # %vector.body4978 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5763,32 +5712,31 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_434 b .LBB5_573 -.LBB5_435: # %vector.body4663.preheader +.LBB5_435: # %vector.body4947.preheader + pcalau12i $s1, %pc_hi20(.LCPI5_0) + vld $vr0, $s1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) - addi.d $a0, $s0, 4 - ori $a1, $zero, 2 - lu12i.w $s1, 7 - ori $a2, $s1, 3328 -.LBB5_436: # %vector.body4663 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_436 + addi.d $a0, $s0, 8 + lu12i.w $s2, 7 + ori $a1, $s2, 3328 +.LBB5_436: # %vector.body4947 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_436 # %bb.437: # %.preheader.i1826.preheader lu12i.w $a1, 31 ori $a0, $a1, 1040 @@ -5799,54 +5747,50 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 2104 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_438: # %vector.body4668 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_438 -# %bb.439: # %vector.body4673.preheader + ori $a1, $s2, 3328 +.LBB5_438: # %vector.body4955 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_438 +# %bb.439: # %vector.body4963.preheader lu12i.w $a0, 93 - ori $a0, $a0, 3156 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 3160 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_440: # %vector.body4673 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_440 + ori $a1, $s2, 3328 +.LBB5_440: # %vector.body4963 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_440 # %bb.441: # %.preheader.i1840.preheader lu12i.w $a0, 125 ori $a0, $a0, 128 @@ -5905,30 +5849,30 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_442 b .LBB5_573 -.LBB5_443: # %vector.body4639.preheader +.LBB5_443: # %vector.body4923.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_444: # %vector.body4639 +.LBB5_444: # %vector.body4923 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_444 -# %bb.445: # %vector.body4644.preheader +# %bb.445: # %vector.body4928.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 -.LBB5_446: # %vector.body4644 +.LBB5_446: # %vector.body4928 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5959,14 +5903,14 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_446 -# %bb.447: # %vector.body4651.preheader +# %bb.447: # %vector.body4935.preheader lu12i.w $a4, 62 - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 2096 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_448: # %vector.body4651 +.LBB5_448: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5997,12 +5941,12 @@ init: # @init addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 bnez $a3, .LBB5_448 -# %bb.449: # %vector.body4658.preheader +# %bb.449: # %vector.body4942.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_450: # %vector.body4658 +.LBB5_450: # %vector.body4942 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6010,154 +5954,148 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_450 b .LBB5_573 -.LBB5_451: # %vector.body4624.preheader +.LBB5_451: # %vector.body4902.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_452: # %vector.body4624 +.LBB5_452: # %vector.body4902 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_452 -# %bb.453: # %vector.body4629.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.453: # %vector.body4907.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_454: # %vector.body4629 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_454: # %vector.body4907 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_454 -# %bb.455: # %vector.body4634.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.455: # %vector.body4915.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_456: # %vector.body4634 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_456: # %vector.body4915 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_456 b .LBB5_573 -.LBB5_457: # %vector.body4599.preheader +.LBB5_457: # %vector.body4871.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_458: # %vector.body4599 +.LBB5_458: # %vector.body4871 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_458 -# %bb.459: # %vector.body4604.preheader +# %bb.459: # %vector.body4876.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_460: # %vector.body4604 +.LBB5_460: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_460 -# %bb.461: # %vector.body4609.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.461: # %vector.body4881.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_462: # %vector.body4609 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_462: # %vector.body4881 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_462 -# %bb.463: # %vector.body4614.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.463: # %vector.body4889.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_464: # %vector.body4614 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_464 -# %bb.465: # %vector.body4619.preheader + ori $a3, $a1, 3328 +.LBB5_464: # %vector.body4889 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 + bnez $a3, .LBB5_464 +# %bb.465: # %vector.body4897.preheader lu12i.w $a2, 125 ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3264 -.LBB5_466: # %vector.body4619 +.LBB5_466: # %vector.body4897 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6165,45 +6103,45 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_466 b .LBB5_573 -.LBB5_467: # %vector.body4574.preheader +.LBB5_467: # %vector.body4843.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_468: # %vector.body4574 +.LBB5_468: # %vector.body4843 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_468 -# %bb.469: # %vector.body4579.preheader +# %bb.469: # %vector.body4848.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_470: # %vector.body4579 +.LBB5_470: # %vector.body4848 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_470 -# %bb.471: # %vector.body4584.preheader +# %bb.471: # %vector.body4853.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_472: # %vector.body4584 +.LBB5_472: # %vector.body4853 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_472 -# %bb.473: # %vector.body4589.preheader +# %bb.473: # %vector.body4858.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a2, $a0, $a2 @@ -6211,37 +6149,36 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_474: # %vector.body4589 +.LBB5_474: # %vector.body4858 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_474 -# %bb.475: # %vector.body4594.preheader +# %bb.475: # %vector.body4863.preheader lu12i.w $a2, 125 - ori $a2, $a2, 132 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_476: # %vector.body4594 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_476: # %vector.body4863 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_476 b .LBB5_573 .LBB5_477: # %.preheader.i1927.preheader @@ -6258,74 +6195,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_478: # %vector.body4554 +.LBB5_478: # %vector.body4817 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_478 -# %bb.479: # %vector.body4559.preheader +# %bb.479: # %vector.body4822.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_480: # %vector.body4559 +.LBB5_480: # %vector.body4822 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_480 -# %bb.481: # %vector.body4564.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.481: # %vector.body4827.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_482: # %vector.body4564 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_482: # %vector.body4827 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_482 -# %bb.483: # %vector.body4569.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.483: # %vector.body4835.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_484: # %vector.body4569 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_484: # %vector.body4835 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_484 b .LBB5_573 .LBB5_485: # %.preheader.i1949.preheader @@ -6461,669 +6395,638 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_490 b .LBB5_573 -.LBB5_491: # %vector.body4519.preheader +.LBB5_491: # %vector.body4773.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_492: # %vector.body4519 +.LBB5_492: # %vector.body4773 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_492 -# %bb.493: # %vector.body4524.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.493: # %vector.body4778.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_494: # %vector.body4524 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_494: # %vector.body4778 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_494 -# %bb.495: # %vector.body4529.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.495: # %vector.body4786.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_496: # %vector.body4529 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_496: # %vector.body4786 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_496 -# %bb.497: # %vector.body4534.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.497: # %vector.body4794.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_498: # %vector.body4534 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_498: # %vector.body4794 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_498 b .LBB5_573 -.LBB5_499: # %vector.body4489.preheader +.LBB5_499: # %vector.body4734.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_500: # %vector.body4489 +.LBB5_500: # %vector.body4734 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_500 -# %bb.501: # %vector.body4494.preheader +# %bb.501: # %vector.body4739.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_502: # %vector.body4494 +.LBB5_502: # %vector.body4739 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_502 -# %bb.503: # %vector.body4499.preheader +# %bb.503: # %vector.body4744.preheader lu12i.w $a2, 46 ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_504: # %vector.body4499 +.LBB5_504: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_504 -# %bb.505: # %vector.body4504.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.505: # %vector.body4749.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_506: # %vector.body4504 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_506: # %vector.body4749 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_506 -# %bb.507: # %vector.body4509.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.507: # %vector.body4757.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_508: # %vector.body4509 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_508: # %vector.body4757 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_508 -# %bb.509: # %vector.body4514.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.509: # %vector.body4765.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_510: # %vector.body4514 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_510: # %vector.body4765 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_510 b .LBB5_573 -.LBB5_511: # %vector.body4459.preheader +.LBB5_511: # %vector.body4695.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_512: # %vector.body4459 +.LBB5_512: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_512 -# %bb.513: # %vector.body4464.preheader +# %bb.513: # %vector.body4700.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_514: # %vector.body4464 +.LBB5_514: # %vector.body4700 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_514 -# %bb.515: # %vector.body4469.preheader +# %bb.515: # %vector.body4705.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_516: # %vector.body4469 +.LBB5_516: # %vector.body4705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_516 -# %bb.517: # %vector.body4474.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.517: # %vector.body4710.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_518: # %vector.body4474 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_518: # %vector.body4710 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_518 -# %bb.519: # %vector.body4479.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.519: # %vector.body4718.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_520: # %vector.body4479 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_520: # %vector.body4718 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_520 -# %bb.521: # %vector.body4484.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.521: # %vector.body4726.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_522: # %vector.body4484 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_522: # %vector.body4726 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_522 b .LBB5_573 -.LBB5_523: # %vector.body4429.preheader +.LBB5_523: # %vector.body4656.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_524: # %vector.body4429 +.LBB5_524: # %vector.body4656 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_524 -# %bb.525: # %vector.body4434.preheader +# %bb.525: # %vector.body4661.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_526: # %vector.body4434 +.LBB5_526: # %vector.body4661 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_526 -# %bb.527: # %vector.body4439.preheader +# %bb.527: # %vector.body4666.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_528: # %vector.body4439 +.LBB5_528: # %vector.body4666 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_528 -# %bb.529: # %vector.body4444.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.529: # %vector.body4671.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_530: # %vector.body4444 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_530: # %vector.body4671 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_530 -# %bb.531: # %vector.body4449.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.531: # %vector.body4679.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_532: # %vector.body4449 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_532: # %vector.body4679 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_532 -# %bb.533: # %vector.body4454.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.533: # %vector.body4687.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_534: # %vector.body4454 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_534: # %vector.body4687 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_534 b .LBB5_573 -.LBB5_535: # %vector.body4404.preheader +.LBB5_535: # %vector.body4622.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_536: # %vector.body4404 +.LBB5_536: # %vector.body4622 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_536 -# %bb.537: # %vector.body4409.preheader +# %bb.537: # %vector.body4627.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_538: # %vector.body4409 +.LBB5_538: # %vector.body4627 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_538 -# %bb.539: # %vector.body4414.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.539: # %vector.body4632.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_540: # %vector.body4414 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_540: # %vector.body4632 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_540 -# %bb.541: # %vector.body4419.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.541: # %vector.body4640.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_542: # %vector.body4419 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_542: # %vector.body4640 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_542 -# %bb.543: # %vector.body4424.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.543: # %vector.body4648.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_544: # %vector.body4424 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_544: # %vector.body4648 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_544 b .LBB5_573 -.LBB5_545: # %vector.body4389.preheader +.LBB5_545: # %vector.body4601.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_546: # %vector.body4389 +.LBB5_546: # %vector.body4601 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_546 -# %bb.547: # %vector.body4394.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.547: # %vector.body4606.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_548: # %vector.body4394 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_548: # %vector.body4606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_548 -# %bb.549: # %vector.body4399.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.549: # %vector.body4614.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_550: # %vector.body4399 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_550: # %vector.body4614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_550 b .LBB5_573 -.LBB5_551: # %vector.body4374.preheader +.LBB5_551: # %vector.body4580.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_552: # %vector.body4374 +.LBB5_552: # %vector.body4580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_552 -# %bb.553: # %vector.body4379.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.553: # %vector.body4585.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_554: # %vector.body4379 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_554: # %vector.body4585 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_554 -# %bb.555: # %vector.body4384.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.555: # %vector.body4593.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_556: # %vector.body4384 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_556: # %vector.body4593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_556 b .LBB5_573 .LBB5_557: # %.preheader.i2121.preheader @@ -7140,19 +7043,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_558: # %vector.body4364 +.LBB5_558: # %vector.body4570 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_558 -# %bb.559: # %vector.body4369.preheader +# %bb.559: # %vector.body4575.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_560: # %vector.body4369 +.LBB5_560: # %vector.body4575 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7174,7 +7077,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_562: # %vector.body4359 +.LBB5_562: # %vector.body4565 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7196,7 +7099,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_564: # %vector.body4354 +.LBB5_564: # %vector.body4560 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7204,31 +7107,30 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_564 b .LBB5_573 -.LBB5_565: # %vector.body4349.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_566: # %vector.body4349 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_566 +.LBB5_565: # %vector.body4552.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_566: # %vector.body4552 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_566 b .LBB5_573 .LBB5_567: # %.preheader.i2154.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7377,43 +7279,43 @@ init: # @init bnez $a2, .LBB5_572 .LBB5_573: # %set1d.exit1086 move $a0, $zero - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $fp, $sp, 64 # 8-byte Folded Reload - ld.d $ra, $sp, 72 # 8-byte Folded Reload - addi.d $sp, $sp, 80 + ld.d $s2, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload + ld.d $s0, $sp, 72 # 8-byte Folded Reload + ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $ra, $sp, 88 # 8-byte Folded Reload + addi.d $sp, $sp, 96 ret .LBB5_574: # %.preheader.i2177.preheader pcalau12i $a0, %pc_hi20(global_data+640192) addi.d $a0, $a0, %pc_lo12(global_data+640192) b .LBB5_345 -.LBB5_575: # %vector.body4325.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_576: # %vector.body4325 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_576 +.LBB5_575: # %vector.body4525.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_576: # %vector.body4525 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_576 b .LBB5_573 -.LBB5_577: # %vector.body4320.preheader +.LBB5_577: # %vector.body4520.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 @@ -7421,7 +7323,7 @@ init: # @init lu12i.w $a2, 260096 ori $a2, $a2, 8 xvreplgr2vr.w $xr0, $a2 -.LBB5_578: # %vector.body4320 +.LBB5_578: # %vector.body4520 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7429,162 +7331,155 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_578 b .LBB5_573 -.LBB5_579: # %vector.body4310.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_580: # %vector.body4310 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_579: # %vector.body4504.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_580: # %vector.body4504 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_580 -# %bb.581: # %vector.body4315.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_582: # %vector.body4315 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.581: # %vector.body4512.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_582: # %vector.body4512 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_582 b .LBB5_573 -.LBB5_583: # %vector.body4305.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_584: # %vector.body4305 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_584 +.LBB5_583: # %vector.body4496.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_584: # %vector.body4496 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_584 b .LBB5_573 -.LBB5_585: # %vector.body4300.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_586: # %vector.body4300 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_586 +.LBB5_585: # %vector.body4488.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_586: # %vector.body4488 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_586 b .LBB5_573 -.LBB5_587: # %vector.body4295.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_588: # %vector.body4295 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_588 +.LBB5_587: # %vector.body4480.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_588: # %vector.body4480 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_588 b .LBB5_573 -.LBB5_589: # %vector.body4290.preheader +.LBB5_589: # %vector.body4472.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_590: # %vector.body4290 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_590 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_590: # %vector.body4472 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_590 b .LBB5_607 .LBB5_591: # %.preheader.i2230.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7603,79 +7498,74 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a1, $fp, $a0 - ori $a2, $zero, 2 + ori $a0, $a0, 2104 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + add.d $a2, $fp, $a0 lu12i.w $a0, 7 ori $a3, $a0, 3328 -.LBB5_592: # %vector.body4275 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_592: # %vector.body4448 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_592 -# %bb.593: # %vector.body4280.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.593: # %vector.body4456.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_594: # %vector.body4280 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_594: # %vector.body4456 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_594 -# %bb.595: # %vector.body4285.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.595: # %vector.body4464.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_596: # %vector.body4285 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_596: # %vector.body4464 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_596 b .LBB5_573 .LBB5_597: # %.preheader34.i2253.preheader @@ -7734,42 +7624,41 @@ init: # @init lu12i.w $a1, 220 ori $a1, $a1, 1212 b .LBB5_630 -.LBB5_600: # %vector.body4263.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_601: # %vector.body4263 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_601 - b .LBB5_573 -.LBB5_602: # %vector.body4256.preheader +.LBB5_600: # %vector.body4433.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_601: # %vector.body4433 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_601 + b .LBB5_573 +.LBB5_602: # %vector.body4426.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_603: # %vector.body4256 +.LBB5_603: # %vector.body4426 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -7804,45 +7693,44 @@ init: # @init pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) b .LBB5_610 -.LBB5_605: # %vector.body4251.preheader +.LBB5_605: # %vector.body4418.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_606: # %vector.body4251 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_606 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_606: # %vector.body4418 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_606 .LBB5_607: # %set1d.exit2229 lu12i.w $a1, 31 ori $a1, $a1, 1020 lu12i.w $a2, -262144 b .LBB5_687 -.LBB5_608: # %vector.body4246.preheader +.LBB5_608: # %vector.body4413.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_609: # %vector.body4246 +.LBB5_609: # %vector.body4413 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7855,14 +7743,14 @@ init: # @init add.d $a0, $a0, $a2 ori $a2, $a1, 1024 b .LBB5_346 -.LBB5_611: # %vector.body4241.preheader +.LBB5_611: # %vector.body4408.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_612: # %vector.body4241 +.LBB5_612: # %vector.body4408 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7886,147 +7774,141 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 b .LBB5_573 -.LBB5_614: # %vector.body4216.preheader +.LBB5_614: # %vector.body4374.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_615: # %vector.body4216 +.LBB5_615: # %vector.body4374 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_615 -# %bb.616: # %vector.body4221.preheader +# %bb.616: # %vector.body4379.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_617: # %vector.body4221 +.LBB5_617: # %vector.body4379 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_617 -# %bb.618: # %vector.body4226.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.618: # %vector.body4384.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_619: # %vector.body4226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_619: # %vector.body4384 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_619 -# %bb.620: # %vector.body4231.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.620: # %vector.body4392.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_621: # %vector.body4231 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_621: # %vector.body4392 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_621 -# %bb.622: # %vector.body4236.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.622: # %vector.body4400.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_623: # %vector.body4236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_623: # %vector.body4400 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_623 b .LBB5_573 -.LBB5_624: # %vector.body4211.preheader +.LBB5_624: # %vector.body4366.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_625: # %vector.body4211 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_625 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_625: # %vector.body4366 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_625 # %bb.626: # %set1d.exit2327 lu12i.w $a1, 31 b .LBB5_686 -.LBB5_627: # %vector.body4204.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_627: # %vector.body4359.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_628: # %vector.body4204 +.LBB5_628: # %vector.body4359 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8075,81 +7957,77 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ori $a0, $s0, 1044 + ori $a0, $s0, 1048 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_632: # %vector.body4199 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_632 - b .LBB5_573 -.LBB5_633: # %vector.body4189.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_634: # %vector.body4189 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_634 -# %bb.635: # %vector.body4194.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_636: # %vector.body4194 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_632: # %vector.body4351 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_632 + b .LBB5_573 +.LBB5_633: # %vector.body4335.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_634: # %vector.body4335 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_634 +# %bb.635: # %vector.body4343.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_636: # %vector.body4343 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_636 b .LBB5_573 .LBB5_637: # %.preheader34.i2352.preheader @@ -8248,26 +8126,26 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_640 b .LBB5_573 -.LBB5_641: # %vector.body4167.preheader +.LBB5_641: # %vector.body4313.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_642: # %vector.body4167 +.LBB5_642: # %vector.body4313 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_642 -# %bb.643: # %vector.body4172.preheader +# %bb.643: # %vector.body4318.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_644: # %vector.body4172 +.LBB5_644: # %vector.body4318 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8275,78 +8153,75 @@ init: # @init addi.d $a2, $a2, 64 bnez $a1, .LBB5_644 b .LBB5_653 -.LBB5_645: # %vector.body4157.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_646: # %vector.body4157 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_646 -# %bb.647: # %vector.body4162.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_648: # %vector.body4162 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_645: # %vector.body4297.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_646: # %vector.body4297 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_646 +# %bb.647: # %vector.body4305.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_648: # %vector.body4305 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_648 b .LBB5_573 -.LBB5_649: # %vector.body4147.preheader +.LBB5_649: # %vector.body4287.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_650: # %vector.body4147 +.LBB5_650: # %vector.body4287 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_650 -# %bb.651: # %vector.body4152.preheader +# %bb.651: # %vector.body4292.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_652: # %vector.body4152 +.LBB5_652: # %vector.body4292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8359,134 +8234,128 @@ init: # @init lu12i.w $a2, 260096 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_654: # %vector.body4132.preheader +.LBB5_654: # %vector.body4266.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_655: # %vector.body4132 +.LBB5_655: # %vector.body4266 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_655 -# %bb.656: # %vector.body4137.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.656: # %vector.body4271.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_657: # %vector.body4137 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_657: # %vector.body4271 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_657 -# %bb.658: # %vector.body4142.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.658: # %vector.body4279.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_659: # %vector.body4142 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_659: # %vector.body4279 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_659 b .LBB5_573 -.LBB5_660: # %vector.body4117.preheader +.LBB5_660: # %vector.body4245.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_661: # %vector.body4117 +.LBB5_661: # %vector.body4245 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_661 -# %bb.662: # %vector.body4122.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.662: # %vector.body4250.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_663: # %vector.body4122 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_663: # %vector.body4250 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_663 -# %bb.664: # %vector.body4127.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.664: # %vector.body4258.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_665: # %vector.body4127 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_665: # %vector.body4258 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_665 b .LBB5_573 .LBB5_666: # %.preheader.i2421.preheader @@ -8503,74 +8372,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_667: # %vector.body4097 +.LBB5_667: # %vector.body4219 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_667 -# %bb.668: # %vector.body4102.preheader +# %bb.668: # %vector.body4224.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_669: # %vector.body4102 +.LBB5_669: # %vector.body4224 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_669 -# %bb.670: # %vector.body4107.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.670: # %vector.body4229.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_671: # %vector.body4107 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_671: # %vector.body4229 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_671 -# %bb.672: # %vector.body4112.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.672: # %vector.body4237.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_673: # %vector.body4112 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_673: # %vector.body4237 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_673 b .LBB5_573 .LBB5_674: # %.preheader.i2443.preheader @@ -8719,69 +8585,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_679 b .LBB5_573 -.LBB5_680: # %vector.body4063.preheader +.LBB5_680: # %vector.body4179.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 xvldi $xr0, -1424 -.LBB5_681: # %vector.body4063 +.LBB5_681: # %vector.body4179 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a3, $a3, -16 addi.d $a1, $a1, 64 bnez $a3, .LBB5_681 -# %bb.682: # %vector.body4068.preheader +# %bb.682: # %vector.body4184.preheader lu12i.w $a1, 31 - ori $a3, $a1, 1044 - add.d $a3, $a0, $a3 - ori $a4, $zero, 2 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a4, $a1, 1048 + add.d $a4, $a0, $a4 ori $a5, $a2, 3328 -.LBB5_683: # %vector.body4068 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - addi.d $a7, $a4, -1 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa0, $a7 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a5, $a5, -2 - addi.w $a4, $a4, 2 +.LBB5_683: # %vector.body4184 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a4, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a5, $a5, -4 + addi.d $a4, $a4, 16 bnez $a5, .LBB5_683 -# %bb.684: # %vector.body4073.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2100 +# %bb.684: # %vector.body4192.preheader + lu12i.w $a4, 62 + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a3, $a4, 2104 add.d $a3, $a0, $a3 - ori $a4, $zero, 2 ori $a2, $a2, 3328 -.LBB5_685: # %vector.body4073 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a4, 31, 0 - addi.d $a6, $a4, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a2, $a2, -2 - addi.w $a4, $a4, 2 +.LBB5_685: # %vector.body4192 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a3, $a3, 16 bnez $a2, .LBB5_685 .LBB5_686: # %set1d.exit2479 ori $a1, $a1, 1020 @@ -8790,16 +8653,16 @@ init: # @init lu32i.d $a2, 0 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_688: # %vector.body4051.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_688: # %vector.body4167.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_689: # %vector.body4051 +.LBB5_689: # %vector.body4167 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8830,12 +8693,12 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_689 -# %bb.690: # %vector.body4058.preheader +# %bb.690: # %vector.body4174.preheader pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 xvldi $xr0, -1424 -.LBB5_691: # %vector.body4058 +.LBB5_691: # %vector.body4174 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -8843,28 +8706,28 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_691 b .LBB5_573 -.LBB5_692: # %vector.body4039.preheader +.LBB5_692: # %vector.body4155.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_693: # %vector.body4039 +.LBB5_693: # %vector.body4155 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_693 -# %bb.694: # %vector.body4044.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.694: # %vector.body4160.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_695: # %vector.body4044 +.LBB5_695: # %vector.body4160 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8904,15 +8767,15 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_697: # %vector.body4032 +.LBB5_697: # %vector.body4148 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8944,28 +8807,28 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_697 b .LBB5_573 -.LBB5_698: # %vector.body4020.preheader +.LBB5_698: # %vector.body4136.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_699: # %vector.body4020 +.LBB5_699: # %vector.body4136 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_699 -# %bb.700: # %vector.body4025.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.700: # %vector.body4141.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_701: # %vector.body4025 +.LBB5_701: # %vector.body4141 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8997,30 +8860,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_701 b .LBB5_573 -.LBB5_702: # %vector.body4008.preheader +.LBB5_702: # %vector.body4124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_703: # %vector.body4008 +.LBB5_703: # %vector.body4124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_703 -# %bb.704: # %vector.body4013.preheader +# %bb.704: # %vector.body4129.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_705: # %vector.body4013 +.LBB5_705: # %vector.body4129 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9052,30 +8915,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_705 b .LBB5_573 -.LBB5_706: # %vector.body3996.preheader +.LBB5_706: # %vector.body4112.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_707: # %vector.body3996 +.LBB5_707: # %vector.body4112 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_707 -# %bb.708: # %vector.body4001.preheader +# %bb.708: # %vector.body4117.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_709: # %vector.body4001 +.LBB5_709: # %vector.body4117 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9107,7 +8970,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_709 b .LBB5_573 -.LBB5_710: # %vector.body3971.preheader +.LBB5_710: # %vector.body4081.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 @@ -9115,7 +8978,7 @@ init: # @init ori $a1, $s0, 3328 xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill -.LBB5_711: # %vector.body3971 +.LBB5_711: # %vector.body4081 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9123,63 +8986,60 @@ init: # @init addi.d $a1, $a1, -16 addi.d $a0, $a0, 64 bnez $a1, .LBB5_711 -# %bb.712: # %vector.body3976.preheader +# %bb.712: # %vector.body4086.preheader lu12i.w $a1, 31 - ori $a0, $a1, 1044 - add.d $a0, $fp, $a0 - ori $a2, $zero, 2 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a2, $a1, 1048 + add.d $a2, $fp, $a2 ori $a3, $s0, 3328 -.LBB5_713: # %vector.body3976 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_713: # %vector.body4086 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_713 -# %bb.714: # %vector.body3981.preheader - lu12i.w $a0, 62 - ori $a0, $a0, 2100 +# %bb.714: # %vector.body4094.preheader + lu12i.w $a2, 62 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a2, 2104 add.d $a0, $fp, $a0 - ori $a2, $zero, 2 - ori $a3, $s0, 3328 -.LBB5_715: # %vector.body3981 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_715 -# %bb.716: # %vector.body3986.preheader + ori $a2, $s0, 3328 +.LBB5_715: # %vector.body4094 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a0, $a0, 16 + bnez $a2, .LBB5_715 +# %bb.716: # %vector.body4102.preheader lu12i.w $a0, 93 ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 xvldi $xr0, -1296 -.LBB5_717: # %vector.body3986 +.LBB5_717: # %vector.body4102 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -9198,7 +9058,7 @@ init: # @init ori $a0, $a0, 2500 add.d $a0, $fp, $a0 ori $a1, $s0, 3328 -.LBB5_719: # %vector.body3991 +.LBB5_719: # %vector.body4107 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9207,238 +9067,225 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_719 b .LBB5_573 -.LBB5_720: # %vector.body3946.preheader +.LBB5_720: # %vector.body4044.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_721: # %vector.body3946 +.LBB5_721: # %vector.body4044 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_721 -# %bb.722: # %vector.body3951.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.722: # %vector.body4049.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_723: # %vector.body3951 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_723: # %vector.body4049 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_723 -# %bb.724: # %vector.body3956.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.724: # %vector.body4057.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_725: # %vector.body3956 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_725: # %vector.body4057 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_725 -# %bb.726: # %vector.body3961.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.726: # %vector.body4065.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_727: # %vector.body3961 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_727: # %vector.body4065 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_727 -# %bb.728: # %vector.body3966.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.728: # %vector.body4073.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_729: # %vector.body3966 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_729: # %vector.body4073 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_729 b .LBB5_573 -.LBB5_730: # %vector.body3931.preheader +.LBB5_730: # %vector.body4023.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_731: # %vector.body3931 +.LBB5_731: # %vector.body4023 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_731 -# %bb.732: # %vector.body3936.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.732: # %vector.body4028.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_733: # %vector.body3936 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_733: # %vector.body4028 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_733 -# %bb.734: # %vector.body3941.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.734: # %vector.body4036.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_735: # %vector.body3941 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_735: # %vector.body4036 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_735 b .LBB5_573 -.LBB5_736: # %vector.body3921.preheader +.LBB5_736: # %vector.body4007.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_737: # %vector.body3921 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_737: # %vector.body4007 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_737 -# %bb.738: # %vector.body3926.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.738: # %vector.body4015.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_739: # %vector.body3926 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_739: # %vector.body4015 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_739 b .LBB5_573 .LBB5_740: # %.preheader.i2608.preheader @@ -9455,14 +9302,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_741: # %vector.body3911 +.LBB5_741: # %vector.body3997 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_741 -# %bb.742: # %vector.body3916.preheader +# %bb.742: # %vector.body4002.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -9470,7 +9317,7 @@ init: # @init lu12i.w $a2, 219235 ori $a2, $a2, 1981 xvreplgr2vr.w $xr0, $a2 -.LBB5_743: # %vector.body3916 +.LBB5_743: # %vector.body4002 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -9488,14 +9335,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_745: # %vector.body3904 +.LBB5_745: # %vector.body3990 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9527,248 +9374,237 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_745 b .LBB5_573 -.LBB5_746: # %vector.body3879.preheader +.LBB5_746: # %vector.body3959.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_747: # %vector.body3879 +.LBB5_747: # %vector.body3959 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_747 -# %bb.748: # %vector.body3884.preheader +# %bb.748: # %vector.body3964.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_749: # %vector.body3884 +.LBB5_749: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_749 -# %bb.750: # %vector.body3889.preheader +# %bb.750: # %vector.body3969.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_751: # %vector.body3889 +.LBB5_751: # %vector.body3969 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_751 -# %bb.752: # %vector.body3894.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.752: # %vector.body3974.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_753: # %vector.body3894 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_753: # %vector.body3974 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_753 -# %bb.754: # %vector.body3899.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.754: # %vector.body3982.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_755: # %vector.body3899 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_755: # %vector.body3982 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_755 b .LBB5_573 -.LBB5_756: # %vector.body3859.preheader +.LBB5_756: # %vector.body3930.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_757: # %vector.body3859 +.LBB5_757: # %vector.body3930 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_757 -# %bb.758: # %vector.body3864.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.758: # %vector.body3935.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_759: # %vector.body3864 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_759: # %vector.body3935 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_759 -# %bb.760: # %vector.body3869.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.760: # %vector.body3943.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_761: # %vector.body3869 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_761: # %vector.body3943 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_761 -# %bb.762: # %vector.body3874.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.762: # %vector.body3951.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_763: # %vector.body3874 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_763: # %vector.body3951 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_763 b .LBB5_573 -.LBB5_764: # %vector.body3844.preheader +.LBB5_764: # %vector.body3909.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_765: # %vector.body3844 +.LBB5_765: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_765 -# %bb.766: # %vector.body3849.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.766: # %vector.body3914.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_767: # %vector.body3849 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_767: # %vector.body3914 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_767 -# %bb.768: # %vector.body3854.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.768: # %vector.body3922.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_769: # %vector.body3854 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_769: # %vector.body3922 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_769 b .LBB5_573 .LBB5_770: # %.preheader.i2684.preheader @@ -9785,103 +9621,99 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_771: # %vector.body3829 +.LBB5_771: # %vector.body3888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_771 -# %bb.772: # %vector.body3834.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.772: # %vector.body3893.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_773: # %vector.body3834 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_773: # %vector.body3893 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_773 -# %bb.774: # %vector.body3839.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.774: # %vector.body3901.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_775: # %vector.body3839 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_775: # %vector.body3901 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_775 b .LBB5_573 -.LBB5_776: # %vector.body3819.preheader +.LBB5_776: # %vector.body3875.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 32 - lu12i.w $a2, 7 - ori $a3, $a2, 3328 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_777: # %vector.body3819 +.LBB5_777: # %vector.body3875 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 - addi.d $a1, $a1, 64 + addi.d $a2, $a2, 64 bnez $a3, .LBB5_777 -# %bb.778: # %vector.body3824.preheader - lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a0, $a0, $a1 - ori $a1, $zero, 2 - ori $a2, $a2, 3328 -.LBB5_779: # %vector.body3824 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_779 +# %bb.778: # %vector.body3880.preheader + lu12i.w $a2, 31 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 1048 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_779: # %vector.body3880 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_779 b .LBB5_573 .LBB5_780: # %.preheader.i2711.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9897,23 +9729,23 @@ init: # @init lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_781: # %vector.body3807 +.LBB5_781: # %vector.body3863 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_781 -# %bb.782: # %vector.body3812.preheader +# %bb.782: # %vector.body3868.preheader lu12i.w $a0, 62 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) pcalau12i $a2, %pc_hi20(.LCPI5_1) - xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_2) + xvld $xr1, $a2, %pc_lo12(.LCPI5_2) ori $a0, $a0, 2096 add.d $a0, $fp, $a0 ori $a1, $a1, 3328 -.LBB5_783: # %vector.body3812 +.LBB5_783: # %vector.body3868 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9959,142 +9791,135 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_785: # %vector.body3792 +.LBB5_785: # %vector.body3842 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_785 -# %bb.786: # %vector.body3797.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.786: # %vector.body3847.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_787: # %vector.body3797 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_787: # %vector.body3847 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_787 -# %bb.788: # %vector.body3802.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.788: # %vector.body3855.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_789: # %vector.body3802 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_789: # %vector.body3855 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_789 b .LBB5_573 -.LBB5_790: # %vector.body3782.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_791: # %vector.body3782 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_790: # %vector.body3826.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_791: # %vector.body3826 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_791 -# %bb.792: # %vector.body3787.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_793: # %vector.body3787 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.792: # %vector.body3834.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_793: # %vector.body3834 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_793 b .LBB5_573 -.LBB5_794: # %vector.body3770.preheader +.LBB5_794: # %vector.body3811.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_795: # %vector.body3770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_795 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_795: # %vector.body3811 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_795 # %bb.796: # %.preheader34.i2757.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -10160,127 +9985,121 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_799: # %vector.body3755 +.LBB5_799: # %vector.body3790 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_799 -# %bb.800: # %vector.body3760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.800: # %vector.body3795.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_801: # %vector.body3760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_801: # %vector.body3795 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_801 -# %bb.802: # %vector.body3765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.802: # %vector.body3803.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_803: # %vector.body3765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_803: # %vector.body3803 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_803 b .LBB5_573 -.LBB5_804: # %vector.body3740.preheader +.LBB5_804: # %vector.body3769.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_805: # %vector.body3740 +.LBB5_805: # %vector.body3769 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_805 -# %bb.806: # %vector.body3745.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.806: # %vector.body3774.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_807: # %vector.body3745 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_807: # %vector.body3774 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_807 -# %bb.808: # %vector.body3750.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.808: # %vector.body3782.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_809: # %vector.body3750 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_809: # %vector.body3782 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_809 b .LBB5_573 .LBB5_810: # %.preheader.i2797.preheader @@ -10293,14 +10112,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_811: # %vector.body3733 +.LBB5_811: # %vector.body3762 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10342,14 +10161,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_813: # %vector.body3726 +.LBB5_813: # %vector.body3755 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10391,14 +10210,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_815: # %vector.body3719 +.LBB5_815: # %vector.body3748 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10440,14 +10259,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_817: # %vector.body3712 +.LBB5_817: # %vector.body3741 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10489,14 +10308,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_819: # %vector.body3705 +.LBB5_819: # %vector.body3734 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10528,26 +10347,26 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_819 b .LBB5_573 -.LBB5_820: # %vector.body3695.preheader +.LBB5_820: # %vector.body3724.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_821: # %vector.body3695 +.LBB5_821: # %vector.body3724 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_821 -# %bb.822: # %vector.body3700.preheader +# %bb.822: # %vector.body3729.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_823: # %vector.body3700 +.LBB5_823: # %vector.body3729 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10555,95 +10374,92 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_823 b .LBB5_573 -.LBB5_824: # %vector.body3680.preheader +.LBB5_824: # %vector.body3703.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_825: # %vector.body3680 +.LBB5_825: # %vector.body3703 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_825 -# %bb.826: # %vector.body3685.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.826: # %vector.body3708.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_827: # %vector.body3685 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_827: # %vector.body3708 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_827 -# %bb.828: # %vector.body3690.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.828: # %vector.body3716.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_829: # %vector.body3690 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_829: # %vector.body3716 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_829 b .LBB5_573 -.LBB5_830: # %vector.body3668.preheader +.LBB5_830: # %vector.body3691.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_831: # %vector.body3668 +.LBB5_831: # %vector.body3691 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_831 -# %bb.832: # %vector.body3673.preheader +# %bb.832: # %vector.body3696.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_833: # %vector.body3673 +.LBB5_833: # %vector.body3696 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10675,16 +10491,16 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_833 b .LBB5_573 -.LBB5_834: # %vector.body3653.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_834: # %vector.body3674.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_835: # %vector.body3653 +.LBB5_835: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10715,7 +10531,7 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_835 -# %bb.836: # %vector.body3658.preheader +# %bb.836: # %vector.body3681.preheader pcalau12i $a1, %pc_hi20(global_data) addi.d $a1, $a1, %pc_lo12(global_data) lu12i.w $a2, 31 @@ -10723,20 +10539,20 @@ init: # @init add.d $a2, $a1, $a2 ori $a3, $a0, 3328 xvldi $xr0, -1424 -.LBB5_837: # %vector.body3658 +.LBB5_837: # %vector.body3681 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_837 -# %bb.838: # %vector.body3663.preheader +# %bb.838: # %vector.body3686.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 xvldi $xr0, -1296 -.LBB5_839: # %vector.body3663 +.LBB5_839: # %vector.body3686 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -10744,40 +10560,40 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_839 b .LBB5_573 -.LBB5_840: # %vector.body3638.preheader +.LBB5_840: # %vector.body3659.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_841: # %vector.body3638 +.LBB5_841: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_841 -# %bb.842: # %vector.body3643.preheader +# %bb.842: # %vector.body3664.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -3264 -.LBB5_843: # %vector.body3643 +.LBB5_843: # %vector.body3664 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_843 -# %bb.844: # %vector.body3648.preheader +# %bb.844: # %vector.body3669.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3265 -.LBB5_845: # %vector.body3648 +.LBB5_845: # %vector.body3669 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10785,82 +10601,78 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_845 b .LBB5_573 -.LBB5_846: # %vector.body3633.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_847: # %vector.body3633 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_847 - b .LBB5_573 -.LBB5_848: # %vector.body3623.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 +.LBB5_846: # %vector.body3651.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_849: # %vector.body3623 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_849 -# %bb.850: # %vector.body3628.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_851: # %vector.body3628 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_847: # %vector.body3651 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_847 + b .LBB5_573 +.LBB5_848: # %vector.body3635.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_849: # %vector.body3635 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_849 +# %bb.850: # %vector.body3643.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_851: # %vector.body3643 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_851 b .LBB5_573 .Lfunc_end5: diff --git a/results/MultiSource/Benchmarks/TSVC/LoopRestructuring-flt/CMakeFiles/LoopRestructuring-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/LoopRestructuring-flt/CMakeFiles/LoopRestructuring-flt.dir/tsc.s index 56e6aa5..8ecc10a 100644 --- a/results/MultiSource/Benchmarks/TSVC/LoopRestructuring-flt/CMakeFiles/LoopRestructuring-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/LoopRestructuring-flt/CMakeFiles/LoopRestructuring-flt.dir/tsc.s @@ -11,6 +11,11 @@ .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI0_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1d .p2align 5 @@ -63,30 +68,29 @@ set1d: # @set1d bnez $a1, .LBB0_3 b .LBB0_8 .LBB0_4: # %vector.body40.preheader - addi.d $a0, $a0, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a1, %pc_hi20(.LCPI0_2) + vld $vr0, $a1, %pc_lo12(.LCPI0_2) + addi.d $a0, $a0, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB0_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB0_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB0_5 b .LBB0_8 .LBB0_6: # %.preheader.preheader move $a2, $zero @@ -118,6 +122,11 @@ set1d: # @set1d .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI1_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1ds .p2align 5 @@ -170,30 +179,29 @@ set1ds: # @set1ds bnez $a0, .LBB1_3 b .LBB1_8 .LBB1_4: # %vector.body40.preheader - addi.d $a0, $a1, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) + addi.d $a0, $a1, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB1_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB1_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB1_5 b .LBB1_8 .LBB1_6: # %.preheader.preheader move $a0, $zero @@ -685,14 +693,19 @@ check: # @check .word .LBB4_14-.LJTI4_0 .word .LBB4_17-.LJTI4_0 # -- End function - .section .rodata.cst32,"aM",@progbits,32 - .p2align 5, 0x0 # -- Begin function init + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 # -- Begin function init .LCPI5_0: + .dword 0 # 0x0 + .dword 1 # 0x1 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 +.LCPI5_1: .dword 4 # 0x4 .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 -.LCPI5_1: +.LCPI5_2: .dword 0 # 0x0 .dword 1 # 0x1 .dword 2 # 0x2 @@ -703,11 +716,12 @@ check: # @check .type init,@function init: # @init # %bb.0: - addi.d $sp, $sp, -80 - st.d $ra, $sp, 72 # 8-byte Folded Spill - st.d $fp, $sp, 64 # 8-byte Folded Spill - st.d $s0, $sp, 56 # 8-byte Folded Spill - st.d $s1, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -96 + st.d $ra, $sp, 88 # 8-byte Folded Spill + st.d $fp, $sp, 80 # 8-byte Folded Spill + st.d $s0, $sp, 72 # 8-byte Folded Spill + st.d $s1, $sp, 64 # 8-byte Folded Spill + st.d $s2, $sp, 56 # 8-byte Folded Spill move $fp, $a0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a1, $a0, %pc_lo12(.L.str.1) @@ -1661,131 +1675,122 @@ init: # @init jirl $ra, $ra, 0 bnez $a0, .LBB5_573 # %bb.136: # %vector.body.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 + addi.d $a3, $a0, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 .LBB5_137: # %vector.body # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_137 # %bb.138: # %vector.body3598.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 + lu12i.w $a3, 31 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 .LBB5_139: # %vector.body3598 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_139 -# %bb.140: # %vector.body3603.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_141: # %vector.body3603 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.140: # %vector.body3606.preheader + lu12i.w $a3, 62 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_141: # %vector.body3606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_141 -# %bb.142: # %vector.body3608.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_143: # %vector.body3608 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.142: # %vector.body3614.preheader + lu12i.w $a3, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_143: # %vector.body3614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_143 -# %bb.144: # %vector.body3613.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_145: # %vector.body3613 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 - bnez $a1, .LBB5_145 +# %bb.144: # %vector.body3622.preheader + lu12i.w $a3, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a3, 136 + add.d $a1, $a0, $a1 + ori $a2, $a2, 3328 +.LBB5_145: # %vector.body3622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_145 # %bb.146: # %.preheader34.i2938.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -1837,7 +1842,7 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_147 b .LBB5_573 -.LBB5_148: # %vector.body5445.preheader +.LBB5_148: # %vector.body5846.preheader pcalau12i $a0, %pc_hi20(X+4) addi.d $a0, $a0, %pc_lo12(X+4) pcalau12i $a1, %pc_hi20(V+4) @@ -1852,7 +1857,7 @@ init: # @init lu12i.w $a6, 7 ori $a6, $a6, 3328 .p2align 4, , 16 -.LBB5_149: # %vector.body5445 +.LBB5_149: # %vector.body5846 # =>This Inner Loop Header: Depth=1 bstrpick.d $a7, $a5, 31, 0 addi.d $t0, $a5, -5 @@ -1896,7 +1901,7 @@ init: # @init addi.d $a4, $a4, 8 bnez $a6, .LBB5_149 b .LBB5_573 -.LBB5_150: # %vector.body5412.preheader +.LBB5_150: # %vector.body5813.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -1904,24 +1909,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_151: # %vector.body5412 +.LBB5_151: # %vector.body5813 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_151 -# %bb.152: # %vector.body5417.preheader +# %bb.152: # %vector.body5818.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_153: # %vector.body5417 +.LBB5_153: # %vector.body5818 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1952,15 +1957,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_153 -# %bb.154: # %vector.body5424.preheader +# %bb.154: # %vector.body5825.preheader lu12i.w $a4, 62 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 2096 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_155: # %vector.body5424 +.LBB5_155: # %vector.body5825 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1991,15 +1996,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_155 -# %bb.156: # %vector.body5431.preheader +# %bb.156: # %vector.body5832.preheader lu12i.w $a4, 93 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 3152 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_157: # %vector.body5431 +.LBB5_157: # %vector.body5832 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2030,15 +2035,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_157 -# %bb.158: # %vector.body5438.preheader +# %bb.158: # %vector.body5839.preheader lu12i.w $a4, 125 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_159: # %vector.body5438 +.LBB5_159: # %vector.body5839 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2070,7 +2075,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_159 b .LBB5_573 -.LBB5_160: # %vector.body5400.preheader +.LBB5_160: # %vector.body5801.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2078,24 +2083,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_161: # %vector.body5400 +.LBB5_161: # %vector.body5801 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_161 -# %bb.162: # %vector.body5405.preheader +# %bb.162: # %vector.body5806.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_163: # %vector.body5405 +.LBB5_163: # %vector.body5806 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2127,7 +2132,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_163 b .LBB5_573 -.LBB5_164: # %vector.body5388.preheader +.LBB5_164: # %vector.body5789.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2135,24 +2140,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_165: # %vector.body5388 +.LBB5_165: # %vector.body5789 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_165 -# %bb.166: # %vector.body5393.preheader +# %bb.166: # %vector.body5794.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_167: # %vector.body5393 +.LBB5_167: # %vector.body5794 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2288,7 +2293,7 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_171 b .LBB5_573 -.LBB5_172: # %vector.body5354.preheader +.LBB5_172: # %vector.body5755.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2296,7 +2301,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_173: # %vector.body5354 +.LBB5_173: # %vector.body5755 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2436,14 +2441,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_179 b .LBB5_573 -.LBB5_180: # %vector.body5349.preheader +.LBB5_180: # %vector.body5750.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_181: # %vector.body5349 +.LBB5_181: # %vector.body5750 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -2451,7 +2456,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_181 b .LBB5_573 -.LBB5_182: # %vector.body5339.preheader +.LBB5_182: # %vector.body5740.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2459,7 +2464,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_183: # %vector.body5339 +.LBB5_183: # %vector.body5740 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2610,30 +2615,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_189 b .LBB5_573 -.LBB5_190: # %vector.body5315.preheader +.LBB5_190: # %vector.body5716.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_191: # %vector.body5315 +.LBB5_191: # %vector.body5716 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_191 -# %bb.192: # %vector.body5320.preheader +# %bb.192: # %vector.body5721.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_193: # %vector.body5320 +.LBB5_193: # %vector.body5721 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2665,30 +2670,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_193 b .LBB5_573 -.LBB5_194: # %vector.body5303.preheader +.LBB5_194: # %vector.body5704.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_195: # %vector.body5303 +.LBB5_195: # %vector.body5704 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_195 -# %bb.196: # %vector.body5308.preheader +# %bb.196: # %vector.body5709.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_197: # %vector.body5308 +.LBB5_197: # %vector.body5709 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2734,74 +2739,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_199: # %vector.body5283 +.LBB5_199: # %vector.body5678 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_199 -# %bb.200: # %vector.body5288.preheader +# %bb.200: # %vector.body5683.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_201: # %vector.body5288 +.LBB5_201: # %vector.body5683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_201 -# %bb.202: # %vector.body5293.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.202: # %vector.body5688.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_203: # %vector.body5293 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_203: # %vector.body5688 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_203 -# %bb.204: # %vector.body5298.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.204: # %vector.body5696.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_205: # %vector.body5298 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_205: # %vector.body5696 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_205 b .LBB5_573 .LBB5_206: # %.preheader.i1195.preheader @@ -2818,74 +2820,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_207: # %vector.body5263 +.LBB5_207: # %vector.body5652 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_207 -# %bb.208: # %vector.body5268.preheader +# %bb.208: # %vector.body5657.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_209: # %vector.body5268 +.LBB5_209: # %vector.body5657 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_209 -# %bb.210: # %vector.body5273.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.210: # %vector.body5662.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_211: # %vector.body5273 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_211: # %vector.body5662 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_211 -# %bb.212: # %vector.body5278.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.212: # %vector.body5670.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_213: # %vector.body5278 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_213: # %vector.body5670 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_213 b .LBB5_573 .LBB5_214: # %.preheader.i1217.preheader @@ -3072,31 +3071,30 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_221 -# %bb.222: # %vector.body5236.preheader - pcalau12i $a1, %pc_hi20(array+4) - addi.d $a1, $a1, %pc_lo12(array+4) - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_223: # %vector.body5236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_223 +# %bb.222: # %vector.body5622.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(array+8) + addi.d $a1, $a1, %pc_lo12(array+8) + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_223: # %vector.body5622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_223 # %bb.224: # %.preheader34.i1253.preheader lu12i.w $a1, 284 ori $a1, $a1, 1888 @@ -3162,87 +3160,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_227: # %vector.body5211 +.LBB5_227: # %vector.body5588 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_227 -# %bb.228: # %vector.body5216.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.228: # %vector.body5593.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_229: # %vector.body5216 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_229: # %vector.body5593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_229 -# %bb.230: # %vector.body5221.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.230: # %vector.body5601.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_231: # %vector.body5221 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_231: # %vector.body5601 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_231 -# %bb.232: # %vector.body5226.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.232: # %vector.body5609.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_233: # %vector.body5226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_233: # %vector.body5609 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_233 b .LBB5_573 .LBB5_234: # %.preheader.i1284.preheader @@ -3259,32 +3252,32 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -3264 -.LBB5_235: # %vector.body5196 +.LBB5_235: # %vector.body5573 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_235 -# %bb.236: # %vector.body5201.preheader +# %bb.236: # %vector.body5578.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_237: # %vector.body5201 +.LBB5_237: # %vector.body5578 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_237 -# %bb.238: # %vector.body5206.preheader +# %bb.238: # %vector.body5583.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_239: # %vector.body5206 +.LBB5_239: # %vector.body5583 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -3292,30 +3285,30 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_239 b .LBB5_573 -.LBB5_240: # %vector.body5184.preheader +.LBB5_240: # %vector.body5561.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_241: # %vector.body5184 +.LBB5_241: # %vector.body5561 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_241 -# %bb.242: # %vector.body5189.preheader +# %bb.242: # %vector.body5566.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_243: # %vector.body5189 +.LBB5_243: # %vector.body5566 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3392,65 +3385,62 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_245 -# %bb.246: # %vector.body5174.preheader +# %bb.246: # %vector.body5545.preheader lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_247: # %vector.body5174 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_247: # %vector.body5545 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_247 -# %bb.248: # %vector.body5179.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.248: # %vector.body5553.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_249: # %vector.body5179 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_249: # %vector.body5553 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_249 b .LBB5_573 -.LBB5_250: # %vector.body5157.preheader +.LBB5_250: # %vector.body5528.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_251: # %vector.body5157 +.LBB5_251: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3506,30 +3496,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_253 b .LBB5_573 -.LBB5_254: # %vector.body5145.preheader +.LBB5_254: # %vector.body5516.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_255: # %vector.body5145 +.LBB5_255: # %vector.body5516 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_255 -# %bb.256: # %vector.body5150.preheader +# %bb.256: # %vector.body5521.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_257: # %vector.body5150 +.LBB5_257: # %vector.body5521 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3561,14 +3551,14 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_257 b .LBB5_573 -.LBB5_258: # %vector.body5125.preheader +.LBB5_258: # %vector.body5487.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 xvldi $xr0, -1424 -.LBB5_259: # %vector.body5125 +.LBB5_259: # %vector.body5487 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3583,274 +3573,263 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 + lu12i.w $a1, 62 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 2104 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_261: # %vector.body5130 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_261: # %vector.body5492 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_261 -# %bb.262: # %vector.body5135.preheader - lu12i.w $a0, 93 - ori $a0, $a0, 3156 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 +# %bb.262: # %vector.body5500.preheader + lu12i.w $a1, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 3160 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_263: # %vector.body5135 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_263: # %vector.body5500 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_263 -# %bb.264: # %vector.body5140.preheader - lu12i.w $a0, 125 - ori $a0, $a0, 132 +# %bb.264: # %vector.body5508.preheader + lu12i.w $a1, 125 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a1, 136 add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - ori $a2, $s0, 3328 -.LBB5_265: # %vector.body5140 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_265 + ori $a1, $s0, 3328 +.LBB5_265: # %vector.body5508 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_265 b .LBB5_573 -.LBB5_266: # %vector.body5094.preheader +.LBB5_266: # %vector.body5450.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_267: # %vector.body5094 +.LBB5_267: # %vector.body5450 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_267 -# %bb.268: # %vector.body5099.preheader +# %bb.268: # %vector.body5455.preheader lu12i.w $a2, 31 ori $a3, $a2, 1048 add.d $a4, $a0, $a3 lu12i.w $a3, 3 ori $a5, $a3, 3712 lu12i.w $a6, 260096 -.LBB5_269: # %vector.body5099 +.LBB5_269: # %vector.body5455 # =>This Inner Loop Header: Depth=1 st.w $a6, $a4, -8 st.w $a6, $a4, 0 addi.d $a5, $a5, -2 addi.d $a4, $a4, 16 bnez $a5, .LBB5_269 -# %bb.270: # %vector.body5104.preheader +# %bb.270: # %vector.body5460.preheader ori $a2, $a2, 1052 add.d $a2, $a0, $a2 ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 -.LBB5_271: # %vector.body5104 +.LBB5_271: # %vector.body5460 # =>This Inner Loop Header: Depth=1 st.w $a4, $a2, -8 st.w $a4, $a2, 0 addi.d $a3, $a3, -2 addi.d $a2, $a2, 16 bnez $a3, .LBB5_271 -# %bb.272: # %vector.body5110.preheader +# %bb.272: # %vector.body5466.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_273: # %vector.body5110 +.LBB5_273: # %vector.body5466 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_273 -# %bb.274: # %vector.body5115.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.274: # %vector.body5471.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_275: # %vector.body5115 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_275: # %vector.body5471 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_275 -# %bb.276: # %vector.body5120.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.276: # %vector.body5479.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_277: # %vector.body5120 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_277: # %vector.body5479 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_277 b .LBB5_573 -.LBB5_278: # %vector.body5079.preheader +.LBB5_278: # %vector.body5429.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_279: # %vector.body5079 +.LBB5_279: # %vector.body5429 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_279 -# %bb.280: # %vector.body5084.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.280: # %vector.body5434.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_281: # %vector.body5084 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_281: # %vector.body5434 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_281 -# %bb.282: # %vector.body5089.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.282: # %vector.body5442.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_283: # %vector.body5089 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_283: # %vector.body5442 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_283 b .LBB5_573 -.LBB5_284: # %vector.body5067.preheader +.LBB5_284: # %vector.body5417.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_285: # %vector.body5067 +.LBB5_285: # %vector.body5417 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_285 -# %bb.286: # %vector.body5072.preheader +# %bb.286: # %vector.body5422.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_287: # %vector.body5072 +.LBB5_287: # %vector.body5422 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3882,30 +3861,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_287 b .LBB5_573 -.LBB5_288: # %vector.body5055.preheader +.LBB5_288: # %vector.body5405.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_289: # %vector.body5055 +.LBB5_289: # %vector.body5405 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_289 -# %bb.290: # %vector.body5060.preheader +# %bb.290: # %vector.body5410.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_291: # %vector.body5060 +.LBB5_291: # %vector.body5410 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3937,30 +3916,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_291 b .LBB5_573 -.LBB5_292: # %vector.body5043.preheader +.LBB5_292: # %vector.body5393.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_293: # %vector.body5043 +.LBB5_293: # %vector.body5393 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_293 -# %bb.294: # %vector.body5048.preheader +# %bb.294: # %vector.body5398.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_295: # %vector.body5048 +.LBB5_295: # %vector.body5398 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3992,30 +3971,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_295 b .LBB5_573 -.LBB5_296: # %vector.body5031.preheader +.LBB5_296: # %vector.body5381.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_297: # %vector.body5031 +.LBB5_297: # %vector.body5381 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_297 -# %bb.298: # %vector.body5036.preheader +# %bb.298: # %vector.body5386.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_299: # %vector.body5036 +.LBB5_299: # %vector.body5386 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4047,30 +4026,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_299 b .LBB5_573 -.LBB5_300: # %vector.body5019.preheader +.LBB5_300: # %vector.body5369.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_301: # %vector.body5019 +.LBB5_301: # %vector.body5369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_301 -# %bb.302: # %vector.body5024.preheader +# %bb.302: # %vector.body5374.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_303: # %vector.body5024 +.LBB5_303: # %vector.body5374 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4102,69 +4081,66 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_303 b .LBB5_573 -.LBB5_304: # %vector.body5004.preheader +.LBB5_304: # %vector.body5348.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_305: # %vector.body5004 +.LBB5_305: # %vector.body5348 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_305 -# %bb.306: # %vector.body5009.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.306: # %vector.body5353.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_307: # %vector.body5009 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_307: # %vector.body5353 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_307 -# %bb.308: # %vector.body5014.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.308: # %vector.body5361.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_309: # %vector.body5014 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_309: # %vector.body5361 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_309 b .LBB5_573 .LBB5_310: # %.preheader.i1469.preheader @@ -4181,254 +4157,241 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_311: # %vector.body4984 +.LBB5_311: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_311 -# %bb.312: # %vector.body4989.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.312: # %vector.body5324.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_313: # %vector.body4989 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_313: # %vector.body5324 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_313 -# %bb.314: # %vector.body4994.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.314: # %vector.body5332.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_315: # %vector.body4994 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_315: # %vector.body5332 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_315 -# %bb.316: # %vector.body4999.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.316: # %vector.body5340.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_317: # %vector.body4999 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_317: # %vector.body5340 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_317 b .LBB5_573 -.LBB5_318: # %vector.body4964.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_319: # %vector.body4964 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_318: # %vector.body5293.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_319: # %vector.body5293 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_319 -# %bb.320: # %vector.body4969.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1072 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 +# %bb.320: # %vector.body5301.preheader + lu12i.w $a3, 31 + ori $a3, $a3, 1072 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 xvldi $xr0, -1424 -.LBB5_321: # %vector.body4969 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_321 -# %bb.322: # %vector.body4974.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2128 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 -.LBB5_323: # %vector.body4974 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_323 -# %bb.324: # %vector.body4979.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_325: # %vector.body4979 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_321: # %vector.body5301 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_321 +# %bb.322: # %vector.body5306.preheader + lu12i.w $a3, 62 + ori $a3, $a3, 2128 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 +.LBB5_323: # %vector.body5306 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_323 +# %bb.324: # %vector.body5311.preheader + lu12i.w $a3, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 3160 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_325: # %vector.body5311 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_325 b .LBB5_573 -.LBB5_326: # %vector.body4944.preheader +.LBB5_326: # %vector.body5264.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_327: # %vector.body4944 +.LBB5_327: # %vector.body5264 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_327 -# %bb.328: # %vector.body4949.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.328: # %vector.body5269.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_329: # %vector.body4949 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_329: # %vector.body5269 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_329 -# %bb.330: # %vector.body4954.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.330: # %vector.body5277.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_331: # %vector.body4954 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_331: # %vector.body5277 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_331 -# %bb.332: # %vector.body4959.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.332: # %vector.body5285.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_333: # %vector.body4959 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_333: # %vector.body5285 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_333 b .LBB5_573 .LBB5_334: # %.preheader.i1529.preheader @@ -4445,19 +4408,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_335: # %vector.body4934 +.LBB5_335: # %vector.body5254 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_335 -# %bb.336: # %vector.body4939.preheader +# %bb.336: # %vector.body5259.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_337: # %vector.body4939 +.LBB5_337: # %vector.body5259 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -4915,69 +4878,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_358 b .LBB5_573 -.LBB5_359: # %vector.body4850.preheader +.LBB5_359: # %vector.body5164.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_360: # %vector.body4850 +.LBB5_360: # %vector.body5164 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_360 -# %bb.361: # %vector.body4855.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.361: # %vector.body5169.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_362: # %vector.body4855 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a5 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_362: # %vector.body5169 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a3, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_362 -# %bb.363: # %vector.body4860.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.363: # %vector.body5177.preheader + lu12i.w $a3, 62 + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 ori $a1, $a1, 3328 -.LBB5_364: # %vector.body4860 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a4 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 +.LBB5_364: # %vector.body5177 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a2, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a1, $a1, -4 + addi.d $a2, $a2, 16 bnez $a1, .LBB5_364 # %bb.365: # %.preheader.i1631.preheader lu12i.w $a1, 156 @@ -5071,50 +5031,50 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_368 b .LBB5_573 -.LBB5_369: # %vector.body4830.preheader +.LBB5_369: # %vector.body5144.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_370: # %vector.body4830 +.LBB5_370: # %vector.body5144 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_370 -# %bb.371: # %vector.body4835.preheader +# %bb.371: # %vector.body5149.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_372: # %vector.body4835 +.LBB5_372: # %vector.body5149 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_372 -# %bb.373: # %vector.body4840.preheader +# %bb.373: # %vector.body5154.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_374: # %vector.body4840 +.LBB5_374: # %vector.body5154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_374 -# %bb.375: # %vector.body4845.preheader +# %bb.375: # %vector.body5159.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_376: # %vector.body4845 +.LBB5_376: # %vector.body5159 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5122,7 +5082,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_376 b .LBB5_573 -.LBB5_377: # %vector.body4810.preheader +.LBB5_377: # %vector.body5124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -5131,43 +5091,43 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_378: # %vector.body4810 +.LBB5_378: # %vector.body5124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_378 -# %bb.379: # %vector.body4815.preheader +# %bb.379: # %vector.body5129.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_380: # %vector.body4815 +.LBB5_380: # %vector.body5129 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_380 -# %bb.381: # %vector.body4820.preheader +# %bb.381: # %vector.body5134.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_382: # %vector.body4820 +.LBB5_382: # %vector.body5134 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_382 -# %bb.383: # %vector.body4825.preheader +# %bb.383: # %vector.body5139.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_384: # %vector.body4825 +.LBB5_384: # %vector.body5139 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5189,87 +5149,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_386: # %vector.body4790 +.LBB5_386: # %vector.body5095 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_386 -# %bb.387: # %vector.body4795.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.387: # %vector.body5100.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_388: # %vector.body4795 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_388: # %vector.body5100 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_388 -# %bb.389: # %vector.body4800.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.389: # %vector.body5108.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_390: # %vector.body4800 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_390: # %vector.body5108 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_390 -# %bb.391: # %vector.body4805.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.391: # %vector.body5116.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_392: # %vector.body4805 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_392: # %vector.body5116 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_392 b .LBB5_573 .LBB5_393: # %.preheader.i1701.preheader @@ -5286,14 +5241,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_394: # %vector.body4775 +.LBB5_394: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_394 -# %bb.395: # %vector.body4780.preheader +# %bb.395: # %vector.body5085.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -5301,19 +5256,19 @@ init: # @init lu12i.w $a3, 219235 ori $a3, $a3, 1981 xvreplgr2vr.w $xr0, $a3 -.LBB5_396: # %vector.body4780 +.LBB5_396: # %vector.body5085 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_396 -# %bb.397: # %vector.body4785.preheader +# %bb.397: # %vector.body5090.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_398: # %vector.body4785 +.LBB5_398: # %vector.body5090 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5335,87 +5290,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_400: # %vector.body4755 +.LBB5_400: # %vector.body5051 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_400 -# %bb.401: # %vector.body4760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.401: # %vector.body5056.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_402: # %vector.body4760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_402: # %vector.body5056 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_402 -# %bb.403: # %vector.body4765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.403: # %vector.body5064.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_404: # %vector.body4765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_404: # %vector.body5064 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_404 -# %bb.405: # %vector.body4770.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.405: # %vector.body5072.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_406: # %vector.body4770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_406: # %vector.body5072 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_406 b .LBB5_573 .LBB5_407: # %.preheader.i1740.preheader @@ -5432,19 +5382,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_408: # %vector.body4745 +.LBB5_408: # %vector.body5041 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_408 -# %bb.409: # %vector.body4750.preheader +# %bb.409: # %vector.body5046.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_410: # %vector.body4750 +.LBB5_410: # %vector.body5046 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5452,21 +5402,21 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_410 b .LBB5_573 -.LBB5_411: # %vector.body4725.preheader +.LBB5_411: # %vector.body5018.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_412: # %vector.body4725 +.LBB5_412: # %vector.body5018 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_412 -# %bb.413: # %vector.body4730.preheader +# %bb.413: # %vector.body5023.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 @@ -5474,49 +5424,48 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr1, $a4 -.LBB5_414: # %vector.body4730 +.LBB5_414: # %vector.body5023 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a2, -32 xvst $xr1, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_414 -# %bb.415: # %vector.body4735.preheader +# %bb.415: # %vector.body5028.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_416: # %vector.body4735 +.LBB5_416: # %vector.body5028 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_416 -# %bb.417: # %vector.body4740.preheader +# %bb.417: # %vector.body5033.preheader lu12i.w $a2, 93 - ori $a2, $a2, 3156 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_418: # %vector.body4740 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_418: # %vector.body5033 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_418 b .LBB5_573 .LBB5_419: # %.preheader.i1769.preheader @@ -5533,7 +5482,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_420: # %vector.body4720 +.LBB5_420: # %vector.body5013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5555,7 +5504,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_422: # %vector.body4715 +.LBB5_422: # %vector.body5008 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5563,14 +5512,14 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_422 b .LBB5_573 -.LBB5_423: # %vector.body4700.preheader +.LBB5_423: # %vector.body4993.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_424: # %vector.body4700 +.LBB5_424: # %vector.body4993 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5663,14 +5612,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_428 b .LBB5_573 -.LBB5_429: # %vector.body4685.preheader +.LBB5_429: # %vector.body4978.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_430: # %vector.body4685 +.LBB5_430: # %vector.body4978 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5763,32 +5712,31 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_434 b .LBB5_573 -.LBB5_435: # %vector.body4663.preheader +.LBB5_435: # %vector.body4947.preheader + pcalau12i $s1, %pc_hi20(.LCPI5_0) + vld $vr0, $s1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) - addi.d $a0, $s0, 4 - ori $a1, $zero, 2 - lu12i.w $s1, 7 - ori $a2, $s1, 3328 -.LBB5_436: # %vector.body4663 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_436 + addi.d $a0, $s0, 8 + lu12i.w $s2, 7 + ori $a1, $s2, 3328 +.LBB5_436: # %vector.body4947 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_436 # %bb.437: # %.preheader.i1826.preheader lu12i.w $a1, 31 ori $a0, $a1, 1040 @@ -5799,54 +5747,50 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 2104 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_438: # %vector.body4668 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_438 -# %bb.439: # %vector.body4673.preheader + ori $a1, $s2, 3328 +.LBB5_438: # %vector.body4955 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_438 +# %bb.439: # %vector.body4963.preheader lu12i.w $a0, 93 - ori $a0, $a0, 3156 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 3160 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_440: # %vector.body4673 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_440 + ori $a1, $s2, 3328 +.LBB5_440: # %vector.body4963 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_440 # %bb.441: # %.preheader.i1840.preheader lu12i.w $a0, 125 ori $a0, $a0, 128 @@ -5905,30 +5849,30 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_442 b .LBB5_573 -.LBB5_443: # %vector.body4639.preheader +.LBB5_443: # %vector.body4923.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_444: # %vector.body4639 +.LBB5_444: # %vector.body4923 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_444 -# %bb.445: # %vector.body4644.preheader +# %bb.445: # %vector.body4928.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 -.LBB5_446: # %vector.body4644 +.LBB5_446: # %vector.body4928 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5959,14 +5903,14 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_446 -# %bb.447: # %vector.body4651.preheader +# %bb.447: # %vector.body4935.preheader lu12i.w $a4, 62 - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 2096 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_448: # %vector.body4651 +.LBB5_448: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5997,12 +5941,12 @@ init: # @init addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 bnez $a3, .LBB5_448 -# %bb.449: # %vector.body4658.preheader +# %bb.449: # %vector.body4942.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_450: # %vector.body4658 +.LBB5_450: # %vector.body4942 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6010,154 +5954,148 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_450 b .LBB5_573 -.LBB5_451: # %vector.body4624.preheader +.LBB5_451: # %vector.body4902.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_452: # %vector.body4624 +.LBB5_452: # %vector.body4902 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_452 -# %bb.453: # %vector.body4629.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.453: # %vector.body4907.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_454: # %vector.body4629 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_454: # %vector.body4907 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_454 -# %bb.455: # %vector.body4634.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.455: # %vector.body4915.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_456: # %vector.body4634 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_456: # %vector.body4915 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_456 b .LBB5_573 -.LBB5_457: # %vector.body4599.preheader +.LBB5_457: # %vector.body4871.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_458: # %vector.body4599 +.LBB5_458: # %vector.body4871 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_458 -# %bb.459: # %vector.body4604.preheader +# %bb.459: # %vector.body4876.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_460: # %vector.body4604 +.LBB5_460: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_460 -# %bb.461: # %vector.body4609.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.461: # %vector.body4881.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_462: # %vector.body4609 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_462: # %vector.body4881 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_462 -# %bb.463: # %vector.body4614.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.463: # %vector.body4889.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_464: # %vector.body4614 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_464 -# %bb.465: # %vector.body4619.preheader + ori $a3, $a1, 3328 +.LBB5_464: # %vector.body4889 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 + bnez $a3, .LBB5_464 +# %bb.465: # %vector.body4897.preheader lu12i.w $a2, 125 ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3264 -.LBB5_466: # %vector.body4619 +.LBB5_466: # %vector.body4897 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6165,45 +6103,45 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_466 b .LBB5_573 -.LBB5_467: # %vector.body4574.preheader +.LBB5_467: # %vector.body4843.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_468: # %vector.body4574 +.LBB5_468: # %vector.body4843 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_468 -# %bb.469: # %vector.body4579.preheader +# %bb.469: # %vector.body4848.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_470: # %vector.body4579 +.LBB5_470: # %vector.body4848 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_470 -# %bb.471: # %vector.body4584.preheader +# %bb.471: # %vector.body4853.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_472: # %vector.body4584 +.LBB5_472: # %vector.body4853 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_472 -# %bb.473: # %vector.body4589.preheader +# %bb.473: # %vector.body4858.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a2, $a0, $a2 @@ -6211,37 +6149,36 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_474: # %vector.body4589 +.LBB5_474: # %vector.body4858 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_474 -# %bb.475: # %vector.body4594.preheader +# %bb.475: # %vector.body4863.preheader lu12i.w $a2, 125 - ori $a2, $a2, 132 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_476: # %vector.body4594 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_476: # %vector.body4863 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_476 b .LBB5_573 .LBB5_477: # %.preheader.i1927.preheader @@ -6258,74 +6195,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_478: # %vector.body4554 +.LBB5_478: # %vector.body4817 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_478 -# %bb.479: # %vector.body4559.preheader +# %bb.479: # %vector.body4822.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_480: # %vector.body4559 +.LBB5_480: # %vector.body4822 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_480 -# %bb.481: # %vector.body4564.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.481: # %vector.body4827.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_482: # %vector.body4564 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_482: # %vector.body4827 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_482 -# %bb.483: # %vector.body4569.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.483: # %vector.body4835.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_484: # %vector.body4569 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_484: # %vector.body4835 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_484 b .LBB5_573 .LBB5_485: # %.preheader.i1949.preheader @@ -6461,669 +6395,638 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_490 b .LBB5_573 -.LBB5_491: # %vector.body4519.preheader +.LBB5_491: # %vector.body4773.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_492: # %vector.body4519 +.LBB5_492: # %vector.body4773 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_492 -# %bb.493: # %vector.body4524.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.493: # %vector.body4778.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_494: # %vector.body4524 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_494: # %vector.body4778 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_494 -# %bb.495: # %vector.body4529.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.495: # %vector.body4786.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_496: # %vector.body4529 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_496: # %vector.body4786 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_496 -# %bb.497: # %vector.body4534.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.497: # %vector.body4794.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_498: # %vector.body4534 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_498: # %vector.body4794 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_498 b .LBB5_573 -.LBB5_499: # %vector.body4489.preheader +.LBB5_499: # %vector.body4734.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_500: # %vector.body4489 +.LBB5_500: # %vector.body4734 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_500 -# %bb.501: # %vector.body4494.preheader +# %bb.501: # %vector.body4739.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_502: # %vector.body4494 +.LBB5_502: # %vector.body4739 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_502 -# %bb.503: # %vector.body4499.preheader +# %bb.503: # %vector.body4744.preheader lu12i.w $a2, 46 ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_504: # %vector.body4499 +.LBB5_504: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_504 -# %bb.505: # %vector.body4504.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.505: # %vector.body4749.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_506: # %vector.body4504 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_506: # %vector.body4749 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_506 -# %bb.507: # %vector.body4509.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.507: # %vector.body4757.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_508: # %vector.body4509 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_508: # %vector.body4757 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_508 -# %bb.509: # %vector.body4514.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.509: # %vector.body4765.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_510: # %vector.body4514 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_510: # %vector.body4765 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_510 b .LBB5_573 -.LBB5_511: # %vector.body4459.preheader +.LBB5_511: # %vector.body4695.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_512: # %vector.body4459 +.LBB5_512: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_512 -# %bb.513: # %vector.body4464.preheader +# %bb.513: # %vector.body4700.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_514: # %vector.body4464 +.LBB5_514: # %vector.body4700 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_514 -# %bb.515: # %vector.body4469.preheader +# %bb.515: # %vector.body4705.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_516: # %vector.body4469 +.LBB5_516: # %vector.body4705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_516 -# %bb.517: # %vector.body4474.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.517: # %vector.body4710.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_518: # %vector.body4474 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_518: # %vector.body4710 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_518 -# %bb.519: # %vector.body4479.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.519: # %vector.body4718.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_520: # %vector.body4479 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_520: # %vector.body4718 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_520 -# %bb.521: # %vector.body4484.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.521: # %vector.body4726.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_522: # %vector.body4484 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_522: # %vector.body4726 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_522 b .LBB5_573 -.LBB5_523: # %vector.body4429.preheader +.LBB5_523: # %vector.body4656.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_524: # %vector.body4429 +.LBB5_524: # %vector.body4656 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_524 -# %bb.525: # %vector.body4434.preheader +# %bb.525: # %vector.body4661.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_526: # %vector.body4434 +.LBB5_526: # %vector.body4661 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_526 -# %bb.527: # %vector.body4439.preheader +# %bb.527: # %vector.body4666.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_528: # %vector.body4439 +.LBB5_528: # %vector.body4666 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_528 -# %bb.529: # %vector.body4444.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.529: # %vector.body4671.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_530: # %vector.body4444 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_530: # %vector.body4671 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_530 -# %bb.531: # %vector.body4449.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.531: # %vector.body4679.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_532: # %vector.body4449 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_532: # %vector.body4679 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_532 -# %bb.533: # %vector.body4454.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.533: # %vector.body4687.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_534: # %vector.body4454 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_534: # %vector.body4687 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_534 b .LBB5_573 -.LBB5_535: # %vector.body4404.preheader +.LBB5_535: # %vector.body4622.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_536: # %vector.body4404 +.LBB5_536: # %vector.body4622 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_536 -# %bb.537: # %vector.body4409.preheader +# %bb.537: # %vector.body4627.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_538: # %vector.body4409 +.LBB5_538: # %vector.body4627 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_538 -# %bb.539: # %vector.body4414.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.539: # %vector.body4632.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_540: # %vector.body4414 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_540: # %vector.body4632 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_540 -# %bb.541: # %vector.body4419.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.541: # %vector.body4640.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_542: # %vector.body4419 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_542: # %vector.body4640 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_542 -# %bb.543: # %vector.body4424.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.543: # %vector.body4648.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_544: # %vector.body4424 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_544: # %vector.body4648 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_544 b .LBB5_573 -.LBB5_545: # %vector.body4389.preheader +.LBB5_545: # %vector.body4601.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_546: # %vector.body4389 +.LBB5_546: # %vector.body4601 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_546 -# %bb.547: # %vector.body4394.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.547: # %vector.body4606.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_548: # %vector.body4394 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_548: # %vector.body4606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_548 -# %bb.549: # %vector.body4399.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.549: # %vector.body4614.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_550: # %vector.body4399 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_550: # %vector.body4614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_550 b .LBB5_573 -.LBB5_551: # %vector.body4374.preheader +.LBB5_551: # %vector.body4580.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_552: # %vector.body4374 +.LBB5_552: # %vector.body4580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_552 -# %bb.553: # %vector.body4379.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.553: # %vector.body4585.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_554: # %vector.body4379 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_554: # %vector.body4585 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_554 -# %bb.555: # %vector.body4384.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.555: # %vector.body4593.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_556: # %vector.body4384 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_556: # %vector.body4593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_556 b .LBB5_573 .LBB5_557: # %.preheader.i2121.preheader @@ -7140,19 +7043,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_558: # %vector.body4364 +.LBB5_558: # %vector.body4570 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_558 -# %bb.559: # %vector.body4369.preheader +# %bb.559: # %vector.body4575.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_560: # %vector.body4369 +.LBB5_560: # %vector.body4575 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7174,7 +7077,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_562: # %vector.body4359 +.LBB5_562: # %vector.body4565 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7196,7 +7099,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_564: # %vector.body4354 +.LBB5_564: # %vector.body4560 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7204,31 +7107,30 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_564 b .LBB5_573 -.LBB5_565: # %vector.body4349.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_566: # %vector.body4349 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_566 +.LBB5_565: # %vector.body4552.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_566: # %vector.body4552 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_566 b .LBB5_573 .LBB5_567: # %.preheader.i2154.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7377,43 +7279,43 @@ init: # @init bnez $a2, .LBB5_572 .LBB5_573: # %set1d.exit1086 move $a0, $zero - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $fp, $sp, 64 # 8-byte Folded Reload - ld.d $ra, $sp, 72 # 8-byte Folded Reload - addi.d $sp, $sp, 80 + ld.d $s2, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload + ld.d $s0, $sp, 72 # 8-byte Folded Reload + ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $ra, $sp, 88 # 8-byte Folded Reload + addi.d $sp, $sp, 96 ret .LBB5_574: # %.preheader.i2177.preheader pcalau12i $a0, %pc_hi20(global_data+640192) addi.d $a0, $a0, %pc_lo12(global_data+640192) b .LBB5_345 -.LBB5_575: # %vector.body4325.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_576: # %vector.body4325 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_576 +.LBB5_575: # %vector.body4525.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_576: # %vector.body4525 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_576 b .LBB5_573 -.LBB5_577: # %vector.body4320.preheader +.LBB5_577: # %vector.body4520.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 @@ -7421,7 +7323,7 @@ init: # @init lu12i.w $a2, 260096 ori $a2, $a2, 8 xvreplgr2vr.w $xr0, $a2 -.LBB5_578: # %vector.body4320 +.LBB5_578: # %vector.body4520 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7429,162 +7331,155 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_578 b .LBB5_573 -.LBB5_579: # %vector.body4310.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_580: # %vector.body4310 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_579: # %vector.body4504.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_580: # %vector.body4504 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_580 -# %bb.581: # %vector.body4315.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_582: # %vector.body4315 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.581: # %vector.body4512.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_582: # %vector.body4512 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_582 b .LBB5_573 -.LBB5_583: # %vector.body4305.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_584: # %vector.body4305 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_584 +.LBB5_583: # %vector.body4496.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_584: # %vector.body4496 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_584 b .LBB5_573 -.LBB5_585: # %vector.body4300.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_586: # %vector.body4300 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_586 +.LBB5_585: # %vector.body4488.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_586: # %vector.body4488 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_586 b .LBB5_573 -.LBB5_587: # %vector.body4295.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_588: # %vector.body4295 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_588 +.LBB5_587: # %vector.body4480.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_588: # %vector.body4480 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_588 b .LBB5_573 -.LBB5_589: # %vector.body4290.preheader +.LBB5_589: # %vector.body4472.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_590: # %vector.body4290 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_590 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_590: # %vector.body4472 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_590 b .LBB5_607 .LBB5_591: # %.preheader.i2230.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7603,79 +7498,74 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a1, $fp, $a0 - ori $a2, $zero, 2 + ori $a0, $a0, 2104 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + add.d $a2, $fp, $a0 lu12i.w $a0, 7 ori $a3, $a0, 3328 -.LBB5_592: # %vector.body4275 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_592: # %vector.body4448 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_592 -# %bb.593: # %vector.body4280.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.593: # %vector.body4456.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_594: # %vector.body4280 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_594: # %vector.body4456 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_594 -# %bb.595: # %vector.body4285.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.595: # %vector.body4464.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_596: # %vector.body4285 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_596: # %vector.body4464 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_596 b .LBB5_573 .LBB5_597: # %.preheader34.i2253.preheader @@ -7734,42 +7624,41 @@ init: # @init lu12i.w $a1, 220 ori $a1, $a1, 1212 b .LBB5_630 -.LBB5_600: # %vector.body4263.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_601: # %vector.body4263 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_601 - b .LBB5_573 -.LBB5_602: # %vector.body4256.preheader +.LBB5_600: # %vector.body4433.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_601: # %vector.body4433 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_601 + b .LBB5_573 +.LBB5_602: # %vector.body4426.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_603: # %vector.body4256 +.LBB5_603: # %vector.body4426 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -7804,45 +7693,44 @@ init: # @init pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) b .LBB5_610 -.LBB5_605: # %vector.body4251.preheader +.LBB5_605: # %vector.body4418.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_606: # %vector.body4251 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_606 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_606: # %vector.body4418 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_606 .LBB5_607: # %set1d.exit2229 lu12i.w $a1, 31 ori $a1, $a1, 1020 lu12i.w $a2, -262144 b .LBB5_687 -.LBB5_608: # %vector.body4246.preheader +.LBB5_608: # %vector.body4413.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_609: # %vector.body4246 +.LBB5_609: # %vector.body4413 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7855,14 +7743,14 @@ init: # @init add.d $a0, $a0, $a2 ori $a2, $a1, 1024 b .LBB5_346 -.LBB5_611: # %vector.body4241.preheader +.LBB5_611: # %vector.body4408.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_612: # %vector.body4241 +.LBB5_612: # %vector.body4408 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7886,147 +7774,141 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 b .LBB5_573 -.LBB5_614: # %vector.body4216.preheader +.LBB5_614: # %vector.body4374.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_615: # %vector.body4216 +.LBB5_615: # %vector.body4374 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_615 -# %bb.616: # %vector.body4221.preheader +# %bb.616: # %vector.body4379.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_617: # %vector.body4221 +.LBB5_617: # %vector.body4379 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_617 -# %bb.618: # %vector.body4226.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.618: # %vector.body4384.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_619: # %vector.body4226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_619: # %vector.body4384 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_619 -# %bb.620: # %vector.body4231.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.620: # %vector.body4392.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_621: # %vector.body4231 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_621: # %vector.body4392 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_621 -# %bb.622: # %vector.body4236.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.622: # %vector.body4400.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_623: # %vector.body4236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_623: # %vector.body4400 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_623 b .LBB5_573 -.LBB5_624: # %vector.body4211.preheader +.LBB5_624: # %vector.body4366.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_625: # %vector.body4211 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_625 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_625: # %vector.body4366 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_625 # %bb.626: # %set1d.exit2327 lu12i.w $a1, 31 b .LBB5_686 -.LBB5_627: # %vector.body4204.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_627: # %vector.body4359.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_628: # %vector.body4204 +.LBB5_628: # %vector.body4359 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8075,81 +7957,77 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ori $a0, $s0, 1044 + ori $a0, $s0, 1048 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_632: # %vector.body4199 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_632 - b .LBB5_573 -.LBB5_633: # %vector.body4189.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_634: # %vector.body4189 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_634 -# %bb.635: # %vector.body4194.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_636: # %vector.body4194 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_632: # %vector.body4351 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_632 + b .LBB5_573 +.LBB5_633: # %vector.body4335.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_634: # %vector.body4335 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_634 +# %bb.635: # %vector.body4343.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_636: # %vector.body4343 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_636 b .LBB5_573 .LBB5_637: # %.preheader34.i2352.preheader @@ -8248,26 +8126,26 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_640 b .LBB5_573 -.LBB5_641: # %vector.body4167.preheader +.LBB5_641: # %vector.body4313.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_642: # %vector.body4167 +.LBB5_642: # %vector.body4313 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_642 -# %bb.643: # %vector.body4172.preheader +# %bb.643: # %vector.body4318.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_644: # %vector.body4172 +.LBB5_644: # %vector.body4318 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8275,78 +8153,75 @@ init: # @init addi.d $a2, $a2, 64 bnez $a1, .LBB5_644 b .LBB5_653 -.LBB5_645: # %vector.body4157.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_646: # %vector.body4157 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_645: # %vector.body4297.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_646: # %vector.body4297 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_646 -# %bb.647: # %vector.body4162.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_648: # %vector.body4162 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.647: # %vector.body4305.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_648: # %vector.body4305 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_648 b .LBB5_573 -.LBB5_649: # %vector.body4147.preheader +.LBB5_649: # %vector.body4287.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_650: # %vector.body4147 +.LBB5_650: # %vector.body4287 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_650 -# %bb.651: # %vector.body4152.preheader +# %bb.651: # %vector.body4292.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_652: # %vector.body4152 +.LBB5_652: # %vector.body4292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8359,134 +8234,128 @@ init: # @init lu12i.w $a2, 260096 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_654: # %vector.body4132.preheader +.LBB5_654: # %vector.body4266.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_655: # %vector.body4132 +.LBB5_655: # %vector.body4266 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_655 -# %bb.656: # %vector.body4137.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.656: # %vector.body4271.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_657: # %vector.body4137 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_657: # %vector.body4271 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_657 -# %bb.658: # %vector.body4142.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.658: # %vector.body4279.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_659: # %vector.body4142 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_659: # %vector.body4279 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_659 b .LBB5_573 -.LBB5_660: # %vector.body4117.preheader +.LBB5_660: # %vector.body4245.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_661: # %vector.body4117 +.LBB5_661: # %vector.body4245 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_661 -# %bb.662: # %vector.body4122.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.662: # %vector.body4250.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_663: # %vector.body4122 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_663: # %vector.body4250 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_663 -# %bb.664: # %vector.body4127.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.664: # %vector.body4258.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_665: # %vector.body4127 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_665: # %vector.body4258 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_665 b .LBB5_573 .LBB5_666: # %.preheader.i2421.preheader @@ -8503,74 +8372,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_667: # %vector.body4097 +.LBB5_667: # %vector.body4219 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_667 -# %bb.668: # %vector.body4102.preheader +# %bb.668: # %vector.body4224.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_669: # %vector.body4102 +.LBB5_669: # %vector.body4224 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_669 -# %bb.670: # %vector.body4107.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.670: # %vector.body4229.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_671: # %vector.body4107 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_671: # %vector.body4229 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_671 -# %bb.672: # %vector.body4112.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.672: # %vector.body4237.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_673: # %vector.body4112 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_673: # %vector.body4237 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_673 b .LBB5_573 .LBB5_674: # %.preheader.i2443.preheader @@ -8719,69 +8585,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_679 b .LBB5_573 -.LBB5_680: # %vector.body4063.preheader +.LBB5_680: # %vector.body4179.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 xvldi $xr0, -1424 -.LBB5_681: # %vector.body4063 +.LBB5_681: # %vector.body4179 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a3, $a3, -16 addi.d $a1, $a1, 64 bnez $a3, .LBB5_681 -# %bb.682: # %vector.body4068.preheader +# %bb.682: # %vector.body4184.preheader lu12i.w $a1, 31 - ori $a3, $a1, 1044 - add.d $a3, $a0, $a3 - ori $a4, $zero, 2 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a4, $a1, 1048 + add.d $a4, $a0, $a4 ori $a5, $a2, 3328 -.LBB5_683: # %vector.body4068 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - addi.d $a7, $a4, -1 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa0, $a7 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a5, $a5, -2 - addi.w $a4, $a4, 2 +.LBB5_683: # %vector.body4184 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a4, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a5, $a5, -4 + addi.d $a4, $a4, 16 bnez $a5, .LBB5_683 -# %bb.684: # %vector.body4073.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2100 +# %bb.684: # %vector.body4192.preheader + lu12i.w $a4, 62 + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a3, $a4, 2104 add.d $a3, $a0, $a3 - ori $a4, $zero, 2 ori $a2, $a2, 3328 -.LBB5_685: # %vector.body4073 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a4, 31, 0 - addi.d $a6, $a4, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a2, $a2, -2 - addi.w $a4, $a4, 2 +.LBB5_685: # %vector.body4192 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a3, $a3, 16 bnez $a2, .LBB5_685 .LBB5_686: # %set1d.exit2479 ori $a1, $a1, 1020 @@ -8790,16 +8653,16 @@ init: # @init lu32i.d $a2, 0 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_688: # %vector.body4051.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_688: # %vector.body4167.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_689: # %vector.body4051 +.LBB5_689: # %vector.body4167 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8830,12 +8693,12 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_689 -# %bb.690: # %vector.body4058.preheader +# %bb.690: # %vector.body4174.preheader pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 xvldi $xr0, -1424 -.LBB5_691: # %vector.body4058 +.LBB5_691: # %vector.body4174 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -8843,28 +8706,28 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_691 b .LBB5_573 -.LBB5_692: # %vector.body4039.preheader +.LBB5_692: # %vector.body4155.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_693: # %vector.body4039 +.LBB5_693: # %vector.body4155 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_693 -# %bb.694: # %vector.body4044.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.694: # %vector.body4160.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_695: # %vector.body4044 +.LBB5_695: # %vector.body4160 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8904,15 +8767,15 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_697: # %vector.body4032 +.LBB5_697: # %vector.body4148 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8944,28 +8807,28 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_697 b .LBB5_573 -.LBB5_698: # %vector.body4020.preheader +.LBB5_698: # %vector.body4136.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_699: # %vector.body4020 +.LBB5_699: # %vector.body4136 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_699 -# %bb.700: # %vector.body4025.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.700: # %vector.body4141.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_701: # %vector.body4025 +.LBB5_701: # %vector.body4141 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8997,30 +8860,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_701 b .LBB5_573 -.LBB5_702: # %vector.body4008.preheader +.LBB5_702: # %vector.body4124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_703: # %vector.body4008 +.LBB5_703: # %vector.body4124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_703 -# %bb.704: # %vector.body4013.preheader +# %bb.704: # %vector.body4129.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_705: # %vector.body4013 +.LBB5_705: # %vector.body4129 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9052,30 +8915,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_705 b .LBB5_573 -.LBB5_706: # %vector.body3996.preheader +.LBB5_706: # %vector.body4112.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_707: # %vector.body3996 +.LBB5_707: # %vector.body4112 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_707 -# %bb.708: # %vector.body4001.preheader +# %bb.708: # %vector.body4117.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_709: # %vector.body4001 +.LBB5_709: # %vector.body4117 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9107,7 +8970,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_709 b .LBB5_573 -.LBB5_710: # %vector.body3971.preheader +.LBB5_710: # %vector.body4081.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 @@ -9115,7 +8978,7 @@ init: # @init ori $a1, $s0, 3328 xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill -.LBB5_711: # %vector.body3971 +.LBB5_711: # %vector.body4081 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9123,63 +8986,60 @@ init: # @init addi.d $a1, $a1, -16 addi.d $a0, $a0, 64 bnez $a1, .LBB5_711 -# %bb.712: # %vector.body3976.preheader +# %bb.712: # %vector.body4086.preheader lu12i.w $a1, 31 - ori $a0, $a1, 1044 - add.d $a0, $fp, $a0 - ori $a2, $zero, 2 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a2, $a1, 1048 + add.d $a2, $fp, $a2 ori $a3, $s0, 3328 -.LBB5_713: # %vector.body3976 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_713: # %vector.body4086 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_713 -# %bb.714: # %vector.body3981.preheader - lu12i.w $a0, 62 - ori $a0, $a0, 2100 +# %bb.714: # %vector.body4094.preheader + lu12i.w $a2, 62 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a2, 2104 add.d $a0, $fp, $a0 - ori $a2, $zero, 2 - ori $a3, $s0, 3328 -.LBB5_715: # %vector.body3981 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_715 -# %bb.716: # %vector.body3986.preheader + ori $a2, $s0, 3328 +.LBB5_715: # %vector.body4094 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a0, $a0, 16 + bnez $a2, .LBB5_715 +# %bb.716: # %vector.body4102.preheader lu12i.w $a0, 93 ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 xvldi $xr0, -1296 -.LBB5_717: # %vector.body3986 +.LBB5_717: # %vector.body4102 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -9198,7 +9058,7 @@ init: # @init ori $a0, $a0, 2500 add.d $a0, $fp, $a0 ori $a1, $s0, 3328 -.LBB5_719: # %vector.body3991 +.LBB5_719: # %vector.body4107 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9207,238 +9067,225 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_719 b .LBB5_573 -.LBB5_720: # %vector.body3946.preheader +.LBB5_720: # %vector.body4044.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_721: # %vector.body3946 +.LBB5_721: # %vector.body4044 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_721 -# %bb.722: # %vector.body3951.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.722: # %vector.body4049.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_723: # %vector.body3951 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_723: # %vector.body4049 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_723 -# %bb.724: # %vector.body3956.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.724: # %vector.body4057.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_725: # %vector.body3956 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_725: # %vector.body4057 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_725 -# %bb.726: # %vector.body3961.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.726: # %vector.body4065.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_727: # %vector.body3961 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_727: # %vector.body4065 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_727 -# %bb.728: # %vector.body3966.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.728: # %vector.body4073.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_729: # %vector.body3966 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_729: # %vector.body4073 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_729 b .LBB5_573 -.LBB5_730: # %vector.body3931.preheader +.LBB5_730: # %vector.body4023.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_731: # %vector.body3931 +.LBB5_731: # %vector.body4023 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_731 -# %bb.732: # %vector.body3936.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.732: # %vector.body4028.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_733: # %vector.body3936 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_733: # %vector.body4028 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_733 -# %bb.734: # %vector.body3941.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.734: # %vector.body4036.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_735: # %vector.body3941 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_735: # %vector.body4036 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_735 b .LBB5_573 -.LBB5_736: # %vector.body3921.preheader +.LBB5_736: # %vector.body4007.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_737: # %vector.body3921 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_737: # %vector.body4007 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_737 -# %bb.738: # %vector.body3926.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.738: # %vector.body4015.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_739: # %vector.body3926 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_739: # %vector.body4015 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_739 b .LBB5_573 .LBB5_740: # %.preheader.i2608.preheader @@ -9455,14 +9302,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_741: # %vector.body3911 +.LBB5_741: # %vector.body3997 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_741 -# %bb.742: # %vector.body3916.preheader +# %bb.742: # %vector.body4002.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -9470,7 +9317,7 @@ init: # @init lu12i.w $a2, 219235 ori $a2, $a2, 1981 xvreplgr2vr.w $xr0, $a2 -.LBB5_743: # %vector.body3916 +.LBB5_743: # %vector.body4002 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -9488,14 +9335,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_745: # %vector.body3904 +.LBB5_745: # %vector.body3990 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9527,248 +9374,237 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_745 b .LBB5_573 -.LBB5_746: # %vector.body3879.preheader +.LBB5_746: # %vector.body3959.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_747: # %vector.body3879 +.LBB5_747: # %vector.body3959 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_747 -# %bb.748: # %vector.body3884.preheader +# %bb.748: # %vector.body3964.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_749: # %vector.body3884 +.LBB5_749: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_749 -# %bb.750: # %vector.body3889.preheader +# %bb.750: # %vector.body3969.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_751: # %vector.body3889 +.LBB5_751: # %vector.body3969 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_751 -# %bb.752: # %vector.body3894.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.752: # %vector.body3974.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_753: # %vector.body3894 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_753: # %vector.body3974 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_753 -# %bb.754: # %vector.body3899.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.754: # %vector.body3982.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_755: # %vector.body3899 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_755: # %vector.body3982 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_755 b .LBB5_573 -.LBB5_756: # %vector.body3859.preheader +.LBB5_756: # %vector.body3930.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_757: # %vector.body3859 +.LBB5_757: # %vector.body3930 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_757 -# %bb.758: # %vector.body3864.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.758: # %vector.body3935.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_759: # %vector.body3864 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_759: # %vector.body3935 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_759 -# %bb.760: # %vector.body3869.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.760: # %vector.body3943.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_761: # %vector.body3869 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_761: # %vector.body3943 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_761 -# %bb.762: # %vector.body3874.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.762: # %vector.body3951.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_763: # %vector.body3874 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_763: # %vector.body3951 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_763 b .LBB5_573 -.LBB5_764: # %vector.body3844.preheader +.LBB5_764: # %vector.body3909.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_765: # %vector.body3844 +.LBB5_765: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_765 -# %bb.766: # %vector.body3849.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_767: # %vector.body3849 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_767 -# %bb.768: # %vector.body3854.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_769: # %vector.body3854 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.766: # %vector.body3914.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a1, 3328 +.LBB5_767: # %vector.body3914 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_767 +# %bb.768: # %vector.body3922.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_769: # %vector.body3922 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_769 b .LBB5_573 .LBB5_770: # %.preheader.i2684.preheader @@ -9785,103 +9621,99 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_771: # %vector.body3829 +.LBB5_771: # %vector.body3888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_771 -# %bb.772: # %vector.body3834.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.772: # %vector.body3893.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_773: # %vector.body3834 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_773: # %vector.body3893 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_773 -# %bb.774: # %vector.body3839.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.774: # %vector.body3901.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_775: # %vector.body3839 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_775: # %vector.body3901 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_775 b .LBB5_573 -.LBB5_776: # %vector.body3819.preheader +.LBB5_776: # %vector.body3875.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 32 - lu12i.w $a2, 7 - ori $a3, $a2, 3328 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_777: # %vector.body3819 +.LBB5_777: # %vector.body3875 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 - addi.d $a1, $a1, 64 + addi.d $a2, $a2, 64 bnez $a3, .LBB5_777 -# %bb.778: # %vector.body3824.preheader - lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a0, $a0, $a1 - ori $a1, $zero, 2 - ori $a2, $a2, 3328 -.LBB5_779: # %vector.body3824 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_779 +# %bb.778: # %vector.body3880.preheader + lu12i.w $a2, 31 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 1048 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_779: # %vector.body3880 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_779 b .LBB5_573 .LBB5_780: # %.preheader.i2711.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9897,23 +9729,23 @@ init: # @init lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_781: # %vector.body3807 +.LBB5_781: # %vector.body3863 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_781 -# %bb.782: # %vector.body3812.preheader +# %bb.782: # %vector.body3868.preheader lu12i.w $a0, 62 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) pcalau12i $a2, %pc_hi20(.LCPI5_1) - xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_2) + xvld $xr1, $a2, %pc_lo12(.LCPI5_2) ori $a0, $a0, 2096 add.d $a0, $fp, $a0 ori $a1, $a1, 3328 -.LBB5_783: # %vector.body3812 +.LBB5_783: # %vector.body3868 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9959,142 +9791,135 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_785: # %vector.body3792 +.LBB5_785: # %vector.body3842 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_785 -# %bb.786: # %vector.body3797.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.786: # %vector.body3847.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_787: # %vector.body3797 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_787: # %vector.body3847 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_787 -# %bb.788: # %vector.body3802.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.788: # %vector.body3855.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_789: # %vector.body3802 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_789: # %vector.body3855 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_789 b .LBB5_573 -.LBB5_790: # %vector.body3782.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_791: # %vector.body3782 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_790: # %vector.body3826.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_791: # %vector.body3826 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_791 -# %bb.792: # %vector.body3787.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_793: # %vector.body3787 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.792: # %vector.body3834.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_793: # %vector.body3834 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_793 b .LBB5_573 -.LBB5_794: # %vector.body3770.preheader +.LBB5_794: # %vector.body3811.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_795: # %vector.body3770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_795 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_795: # %vector.body3811 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_795 # %bb.796: # %.preheader34.i2757.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -10160,127 +9985,121 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_799: # %vector.body3755 +.LBB5_799: # %vector.body3790 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_799 -# %bb.800: # %vector.body3760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.800: # %vector.body3795.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_801: # %vector.body3760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_801: # %vector.body3795 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_801 -# %bb.802: # %vector.body3765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.802: # %vector.body3803.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_803: # %vector.body3765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_803: # %vector.body3803 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_803 b .LBB5_573 -.LBB5_804: # %vector.body3740.preheader +.LBB5_804: # %vector.body3769.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_805: # %vector.body3740 +.LBB5_805: # %vector.body3769 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_805 -# %bb.806: # %vector.body3745.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.806: # %vector.body3774.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_807: # %vector.body3745 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_807: # %vector.body3774 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_807 -# %bb.808: # %vector.body3750.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.808: # %vector.body3782.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_809: # %vector.body3750 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_809: # %vector.body3782 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_809 b .LBB5_573 .LBB5_810: # %.preheader.i2797.preheader @@ -10293,14 +10112,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_811: # %vector.body3733 +.LBB5_811: # %vector.body3762 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10342,14 +10161,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_813: # %vector.body3726 +.LBB5_813: # %vector.body3755 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10391,14 +10210,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_815: # %vector.body3719 +.LBB5_815: # %vector.body3748 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10440,14 +10259,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_817: # %vector.body3712 +.LBB5_817: # %vector.body3741 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10489,14 +10308,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_819: # %vector.body3705 +.LBB5_819: # %vector.body3734 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10528,26 +10347,26 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_819 b .LBB5_573 -.LBB5_820: # %vector.body3695.preheader +.LBB5_820: # %vector.body3724.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_821: # %vector.body3695 +.LBB5_821: # %vector.body3724 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_821 -# %bb.822: # %vector.body3700.preheader +# %bb.822: # %vector.body3729.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_823: # %vector.body3700 +.LBB5_823: # %vector.body3729 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10555,95 +10374,92 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_823 b .LBB5_573 -.LBB5_824: # %vector.body3680.preheader +.LBB5_824: # %vector.body3703.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_825: # %vector.body3680 +.LBB5_825: # %vector.body3703 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_825 -# %bb.826: # %vector.body3685.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.826: # %vector.body3708.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_827: # %vector.body3685 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_827: # %vector.body3708 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_827 -# %bb.828: # %vector.body3690.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.828: # %vector.body3716.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_829: # %vector.body3690 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_829: # %vector.body3716 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_829 b .LBB5_573 -.LBB5_830: # %vector.body3668.preheader +.LBB5_830: # %vector.body3691.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_831: # %vector.body3668 +.LBB5_831: # %vector.body3691 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_831 -# %bb.832: # %vector.body3673.preheader +# %bb.832: # %vector.body3696.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_833: # %vector.body3673 +.LBB5_833: # %vector.body3696 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10675,16 +10491,16 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_833 b .LBB5_573 -.LBB5_834: # %vector.body3653.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_834: # %vector.body3674.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_835: # %vector.body3653 +.LBB5_835: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10715,7 +10531,7 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_835 -# %bb.836: # %vector.body3658.preheader +# %bb.836: # %vector.body3681.preheader pcalau12i $a1, %pc_hi20(global_data) addi.d $a1, $a1, %pc_lo12(global_data) lu12i.w $a2, 31 @@ -10723,20 +10539,20 @@ init: # @init add.d $a2, $a1, $a2 ori $a3, $a0, 3328 xvldi $xr0, -1424 -.LBB5_837: # %vector.body3658 +.LBB5_837: # %vector.body3681 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_837 -# %bb.838: # %vector.body3663.preheader +# %bb.838: # %vector.body3686.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 xvldi $xr0, -1296 -.LBB5_839: # %vector.body3663 +.LBB5_839: # %vector.body3686 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -10744,40 +10560,40 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_839 b .LBB5_573 -.LBB5_840: # %vector.body3638.preheader +.LBB5_840: # %vector.body3659.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_841: # %vector.body3638 +.LBB5_841: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_841 -# %bb.842: # %vector.body3643.preheader +# %bb.842: # %vector.body3664.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -3264 -.LBB5_843: # %vector.body3643 +.LBB5_843: # %vector.body3664 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_843 -# %bb.844: # %vector.body3648.preheader +# %bb.844: # %vector.body3669.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3265 -.LBB5_845: # %vector.body3648 +.LBB5_845: # %vector.body3669 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10785,82 +10601,78 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_845 b .LBB5_573 -.LBB5_846: # %vector.body3633.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_847: # %vector.body3633 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_847 - b .LBB5_573 -.LBB5_848: # %vector.body3623.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 +.LBB5_846: # %vector.body3651.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_849: # %vector.body3623 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_849 -# %bb.850: # %vector.body3628.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_851: # %vector.body3628 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_847: # %vector.body3651 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_847 + b .LBB5_573 +.LBB5_848: # %vector.body3635.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_849: # %vector.body3635 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_849 +# %bb.850: # %vector.body3643.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_851: # %vector.body3643 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_851 b .LBB5_573 .Lfunc_end5: diff --git a/results/MultiSource/Benchmarks/TSVC/NodeSplitting-flt/CMakeFiles/NodeSplitting-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/NodeSplitting-flt/CMakeFiles/NodeSplitting-flt.dir/tsc.s index 9e60b3b..e1876d2 100644 --- a/results/MultiSource/Benchmarks/TSVC/NodeSplitting-flt/CMakeFiles/NodeSplitting-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/NodeSplitting-flt/CMakeFiles/NodeSplitting-flt.dir/tsc.s @@ -11,6 +11,11 @@ .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI0_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1d .p2align 5 @@ -63,30 +68,29 @@ set1d: # @set1d bnez $a1, .LBB0_3 b .LBB0_8 .LBB0_4: # %vector.body40.preheader - addi.d $a0, $a0, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a1, %pc_hi20(.LCPI0_2) + vld $vr0, $a1, %pc_lo12(.LCPI0_2) + addi.d $a0, $a0, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB0_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB0_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB0_5 b .LBB0_8 .LBB0_6: # %.preheader.preheader move $a2, $zero @@ -118,6 +122,11 @@ set1d: # @set1d .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI1_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1ds .p2align 5 @@ -170,30 +179,29 @@ set1ds: # @set1ds bnez $a0, .LBB1_3 b .LBB1_8 .LBB1_4: # %vector.body40.preheader - addi.d $a0, $a1, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) + addi.d $a0, $a1, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB1_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB1_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB1_5 b .LBB1_8 .LBB1_6: # %.preheader.preheader move $a0, $zero @@ -685,14 +693,19 @@ check: # @check .word .LBB4_14-.LJTI4_0 .word .LBB4_17-.LJTI4_0 # -- End function - .section .rodata.cst32,"aM",@progbits,32 - .p2align 5, 0x0 # -- Begin function init + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 # -- Begin function init .LCPI5_0: + .dword 0 # 0x0 + .dword 1 # 0x1 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 +.LCPI5_1: .dword 4 # 0x4 .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 -.LCPI5_1: +.LCPI5_2: .dword 0 # 0x0 .dword 1 # 0x1 .dword 2 # 0x2 @@ -703,11 +716,12 @@ check: # @check .type init,@function init: # @init # %bb.0: - addi.d $sp, $sp, -80 - st.d $ra, $sp, 72 # 8-byte Folded Spill - st.d $fp, $sp, 64 # 8-byte Folded Spill - st.d $s0, $sp, 56 # 8-byte Folded Spill - st.d $s1, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -96 + st.d $ra, $sp, 88 # 8-byte Folded Spill + st.d $fp, $sp, 80 # 8-byte Folded Spill + st.d $s0, $sp, 72 # 8-byte Folded Spill + st.d $s1, $sp, 64 # 8-byte Folded Spill + st.d $s2, $sp, 56 # 8-byte Folded Spill move $fp, $a0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a1, $a0, %pc_lo12(.L.str.1) @@ -1661,131 +1675,122 @@ init: # @init jirl $ra, $ra, 0 bnez $a0, .LBB5_573 # %bb.136: # %vector.body.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 + addi.d $a3, $a0, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 .LBB5_137: # %vector.body # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_137 # %bb.138: # %vector.body3598.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 + lu12i.w $a3, 31 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 .LBB5_139: # %vector.body3598 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_139 -# %bb.140: # %vector.body3603.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_141: # %vector.body3603 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.140: # %vector.body3606.preheader + lu12i.w $a3, 62 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_141: # %vector.body3606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_141 -# %bb.142: # %vector.body3608.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_143: # %vector.body3608 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.142: # %vector.body3614.preheader + lu12i.w $a3, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_143: # %vector.body3614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_143 -# %bb.144: # %vector.body3613.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_145: # %vector.body3613 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 - bnez $a1, .LBB5_145 +# %bb.144: # %vector.body3622.preheader + lu12i.w $a3, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a3, 136 + add.d $a1, $a0, $a1 + ori $a2, $a2, 3328 +.LBB5_145: # %vector.body3622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_145 # %bb.146: # %.preheader34.i2938.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -1837,7 +1842,7 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_147 b .LBB5_573 -.LBB5_148: # %vector.body5445.preheader +.LBB5_148: # %vector.body5846.preheader pcalau12i $a0, %pc_hi20(X+4) addi.d $a0, $a0, %pc_lo12(X+4) pcalau12i $a1, %pc_hi20(V+4) @@ -1852,7 +1857,7 @@ init: # @init lu12i.w $a6, 7 ori $a6, $a6, 3328 .p2align 4, , 16 -.LBB5_149: # %vector.body5445 +.LBB5_149: # %vector.body5846 # =>This Inner Loop Header: Depth=1 bstrpick.d $a7, $a5, 31, 0 addi.d $t0, $a5, -5 @@ -1896,7 +1901,7 @@ init: # @init addi.d $a4, $a4, 8 bnez $a6, .LBB5_149 b .LBB5_573 -.LBB5_150: # %vector.body5412.preheader +.LBB5_150: # %vector.body5813.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -1904,24 +1909,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_151: # %vector.body5412 +.LBB5_151: # %vector.body5813 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_151 -# %bb.152: # %vector.body5417.preheader +# %bb.152: # %vector.body5818.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_153: # %vector.body5417 +.LBB5_153: # %vector.body5818 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1952,15 +1957,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_153 -# %bb.154: # %vector.body5424.preheader +# %bb.154: # %vector.body5825.preheader lu12i.w $a4, 62 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 2096 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_155: # %vector.body5424 +.LBB5_155: # %vector.body5825 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1991,15 +1996,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_155 -# %bb.156: # %vector.body5431.preheader +# %bb.156: # %vector.body5832.preheader lu12i.w $a4, 93 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 3152 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_157: # %vector.body5431 +.LBB5_157: # %vector.body5832 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2030,15 +2035,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_157 -# %bb.158: # %vector.body5438.preheader +# %bb.158: # %vector.body5839.preheader lu12i.w $a4, 125 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_159: # %vector.body5438 +.LBB5_159: # %vector.body5839 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2070,7 +2075,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_159 b .LBB5_573 -.LBB5_160: # %vector.body5400.preheader +.LBB5_160: # %vector.body5801.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2078,24 +2083,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_161: # %vector.body5400 +.LBB5_161: # %vector.body5801 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_161 -# %bb.162: # %vector.body5405.preheader +# %bb.162: # %vector.body5806.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_163: # %vector.body5405 +.LBB5_163: # %vector.body5806 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2127,7 +2132,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_163 b .LBB5_573 -.LBB5_164: # %vector.body5388.preheader +.LBB5_164: # %vector.body5789.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2135,24 +2140,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_165: # %vector.body5388 +.LBB5_165: # %vector.body5789 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_165 -# %bb.166: # %vector.body5393.preheader +# %bb.166: # %vector.body5794.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_167: # %vector.body5393 +.LBB5_167: # %vector.body5794 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2288,7 +2293,7 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_171 b .LBB5_573 -.LBB5_172: # %vector.body5354.preheader +.LBB5_172: # %vector.body5755.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2296,7 +2301,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_173: # %vector.body5354 +.LBB5_173: # %vector.body5755 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2436,14 +2441,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_179 b .LBB5_573 -.LBB5_180: # %vector.body5349.preheader +.LBB5_180: # %vector.body5750.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_181: # %vector.body5349 +.LBB5_181: # %vector.body5750 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -2451,7 +2456,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_181 b .LBB5_573 -.LBB5_182: # %vector.body5339.preheader +.LBB5_182: # %vector.body5740.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2459,7 +2464,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_183: # %vector.body5339 +.LBB5_183: # %vector.body5740 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2610,30 +2615,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_189 b .LBB5_573 -.LBB5_190: # %vector.body5315.preheader +.LBB5_190: # %vector.body5716.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_191: # %vector.body5315 +.LBB5_191: # %vector.body5716 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_191 -# %bb.192: # %vector.body5320.preheader +# %bb.192: # %vector.body5721.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_193: # %vector.body5320 +.LBB5_193: # %vector.body5721 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2665,30 +2670,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_193 b .LBB5_573 -.LBB5_194: # %vector.body5303.preheader +.LBB5_194: # %vector.body5704.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_195: # %vector.body5303 +.LBB5_195: # %vector.body5704 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_195 -# %bb.196: # %vector.body5308.preheader +# %bb.196: # %vector.body5709.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_197: # %vector.body5308 +.LBB5_197: # %vector.body5709 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2734,74 +2739,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_199: # %vector.body5283 +.LBB5_199: # %vector.body5678 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_199 -# %bb.200: # %vector.body5288.preheader +# %bb.200: # %vector.body5683.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_201: # %vector.body5288 +.LBB5_201: # %vector.body5683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_201 -# %bb.202: # %vector.body5293.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.202: # %vector.body5688.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_203: # %vector.body5293 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_203: # %vector.body5688 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_203 -# %bb.204: # %vector.body5298.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.204: # %vector.body5696.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_205: # %vector.body5298 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_205: # %vector.body5696 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_205 b .LBB5_573 .LBB5_206: # %.preheader.i1195.preheader @@ -2818,74 +2820,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_207: # %vector.body5263 +.LBB5_207: # %vector.body5652 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_207 -# %bb.208: # %vector.body5268.preheader +# %bb.208: # %vector.body5657.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_209: # %vector.body5268 +.LBB5_209: # %vector.body5657 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_209 -# %bb.210: # %vector.body5273.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.210: # %vector.body5662.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_211: # %vector.body5273 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_211: # %vector.body5662 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_211 -# %bb.212: # %vector.body5278.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.212: # %vector.body5670.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_213: # %vector.body5278 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_213: # %vector.body5670 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_213 b .LBB5_573 .LBB5_214: # %.preheader.i1217.preheader @@ -3072,31 +3071,30 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_221 -# %bb.222: # %vector.body5236.preheader - pcalau12i $a1, %pc_hi20(array+4) - addi.d $a1, $a1, %pc_lo12(array+4) - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_223: # %vector.body5236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_223 +# %bb.222: # %vector.body5622.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(array+8) + addi.d $a1, $a1, %pc_lo12(array+8) + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_223: # %vector.body5622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_223 # %bb.224: # %.preheader34.i1253.preheader lu12i.w $a1, 284 ori $a1, $a1, 1888 @@ -3162,87 +3160,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_227: # %vector.body5211 +.LBB5_227: # %vector.body5588 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_227 -# %bb.228: # %vector.body5216.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.228: # %vector.body5593.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_229: # %vector.body5216 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_229: # %vector.body5593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_229 -# %bb.230: # %vector.body5221.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.230: # %vector.body5601.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_231: # %vector.body5221 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_231: # %vector.body5601 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_231 -# %bb.232: # %vector.body5226.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.232: # %vector.body5609.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_233: # %vector.body5226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_233: # %vector.body5609 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_233 b .LBB5_573 .LBB5_234: # %.preheader.i1284.preheader @@ -3259,32 +3252,32 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -3264 -.LBB5_235: # %vector.body5196 +.LBB5_235: # %vector.body5573 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_235 -# %bb.236: # %vector.body5201.preheader +# %bb.236: # %vector.body5578.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_237: # %vector.body5201 +.LBB5_237: # %vector.body5578 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_237 -# %bb.238: # %vector.body5206.preheader +# %bb.238: # %vector.body5583.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_239: # %vector.body5206 +.LBB5_239: # %vector.body5583 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -3292,30 +3285,30 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_239 b .LBB5_573 -.LBB5_240: # %vector.body5184.preheader +.LBB5_240: # %vector.body5561.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_241: # %vector.body5184 +.LBB5_241: # %vector.body5561 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_241 -# %bb.242: # %vector.body5189.preheader +# %bb.242: # %vector.body5566.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_243: # %vector.body5189 +.LBB5_243: # %vector.body5566 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3392,65 +3385,62 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_245 -# %bb.246: # %vector.body5174.preheader +# %bb.246: # %vector.body5545.preheader lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_247: # %vector.body5174 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_247: # %vector.body5545 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_247 -# %bb.248: # %vector.body5179.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.248: # %vector.body5553.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_249: # %vector.body5179 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_249: # %vector.body5553 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_249 b .LBB5_573 -.LBB5_250: # %vector.body5157.preheader +.LBB5_250: # %vector.body5528.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_251: # %vector.body5157 +.LBB5_251: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3506,30 +3496,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_253 b .LBB5_573 -.LBB5_254: # %vector.body5145.preheader +.LBB5_254: # %vector.body5516.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_255: # %vector.body5145 +.LBB5_255: # %vector.body5516 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_255 -# %bb.256: # %vector.body5150.preheader +# %bb.256: # %vector.body5521.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_257: # %vector.body5150 +.LBB5_257: # %vector.body5521 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3561,14 +3551,14 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_257 b .LBB5_573 -.LBB5_258: # %vector.body5125.preheader +.LBB5_258: # %vector.body5487.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 xvldi $xr0, -1424 -.LBB5_259: # %vector.body5125 +.LBB5_259: # %vector.body5487 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3583,274 +3573,263 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 + lu12i.w $a1, 62 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 2104 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_261: # %vector.body5130 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_261: # %vector.body5492 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_261 -# %bb.262: # %vector.body5135.preheader - lu12i.w $a0, 93 - ori $a0, $a0, 3156 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 +# %bb.262: # %vector.body5500.preheader + lu12i.w $a1, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 3160 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_263: # %vector.body5135 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_263: # %vector.body5500 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_263 -# %bb.264: # %vector.body5140.preheader - lu12i.w $a0, 125 - ori $a0, $a0, 132 +# %bb.264: # %vector.body5508.preheader + lu12i.w $a1, 125 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a1, 136 add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - ori $a2, $s0, 3328 -.LBB5_265: # %vector.body5140 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_265 + ori $a1, $s0, 3328 +.LBB5_265: # %vector.body5508 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_265 b .LBB5_573 -.LBB5_266: # %vector.body5094.preheader +.LBB5_266: # %vector.body5450.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_267: # %vector.body5094 +.LBB5_267: # %vector.body5450 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_267 -# %bb.268: # %vector.body5099.preheader +# %bb.268: # %vector.body5455.preheader lu12i.w $a2, 31 ori $a3, $a2, 1048 add.d $a4, $a0, $a3 lu12i.w $a3, 3 ori $a5, $a3, 3712 lu12i.w $a6, 260096 -.LBB5_269: # %vector.body5099 +.LBB5_269: # %vector.body5455 # =>This Inner Loop Header: Depth=1 st.w $a6, $a4, -8 st.w $a6, $a4, 0 addi.d $a5, $a5, -2 addi.d $a4, $a4, 16 bnez $a5, .LBB5_269 -# %bb.270: # %vector.body5104.preheader +# %bb.270: # %vector.body5460.preheader ori $a2, $a2, 1052 add.d $a2, $a0, $a2 ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 -.LBB5_271: # %vector.body5104 +.LBB5_271: # %vector.body5460 # =>This Inner Loop Header: Depth=1 st.w $a4, $a2, -8 st.w $a4, $a2, 0 addi.d $a3, $a3, -2 addi.d $a2, $a2, 16 bnez $a3, .LBB5_271 -# %bb.272: # %vector.body5110.preheader +# %bb.272: # %vector.body5466.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_273: # %vector.body5110 +.LBB5_273: # %vector.body5466 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_273 -# %bb.274: # %vector.body5115.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.274: # %vector.body5471.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_275: # %vector.body5115 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_275: # %vector.body5471 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_275 -# %bb.276: # %vector.body5120.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.276: # %vector.body5479.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_277: # %vector.body5120 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_277: # %vector.body5479 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_277 b .LBB5_573 -.LBB5_278: # %vector.body5079.preheader +.LBB5_278: # %vector.body5429.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_279: # %vector.body5079 +.LBB5_279: # %vector.body5429 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_279 -# %bb.280: # %vector.body5084.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.280: # %vector.body5434.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_281: # %vector.body5084 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_281: # %vector.body5434 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_281 -# %bb.282: # %vector.body5089.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.282: # %vector.body5442.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_283: # %vector.body5089 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_283: # %vector.body5442 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_283 b .LBB5_573 -.LBB5_284: # %vector.body5067.preheader +.LBB5_284: # %vector.body5417.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_285: # %vector.body5067 +.LBB5_285: # %vector.body5417 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_285 -# %bb.286: # %vector.body5072.preheader +# %bb.286: # %vector.body5422.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_287: # %vector.body5072 +.LBB5_287: # %vector.body5422 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3882,30 +3861,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_287 b .LBB5_573 -.LBB5_288: # %vector.body5055.preheader +.LBB5_288: # %vector.body5405.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_289: # %vector.body5055 +.LBB5_289: # %vector.body5405 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_289 -# %bb.290: # %vector.body5060.preheader +# %bb.290: # %vector.body5410.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_291: # %vector.body5060 +.LBB5_291: # %vector.body5410 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3937,30 +3916,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_291 b .LBB5_573 -.LBB5_292: # %vector.body5043.preheader +.LBB5_292: # %vector.body5393.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_293: # %vector.body5043 +.LBB5_293: # %vector.body5393 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_293 -# %bb.294: # %vector.body5048.preheader +# %bb.294: # %vector.body5398.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_295: # %vector.body5048 +.LBB5_295: # %vector.body5398 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3992,30 +3971,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_295 b .LBB5_573 -.LBB5_296: # %vector.body5031.preheader +.LBB5_296: # %vector.body5381.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_297: # %vector.body5031 +.LBB5_297: # %vector.body5381 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_297 -# %bb.298: # %vector.body5036.preheader +# %bb.298: # %vector.body5386.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_299: # %vector.body5036 +.LBB5_299: # %vector.body5386 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4047,30 +4026,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_299 b .LBB5_573 -.LBB5_300: # %vector.body5019.preheader +.LBB5_300: # %vector.body5369.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_301: # %vector.body5019 +.LBB5_301: # %vector.body5369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_301 -# %bb.302: # %vector.body5024.preheader +# %bb.302: # %vector.body5374.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_303: # %vector.body5024 +.LBB5_303: # %vector.body5374 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4102,69 +4081,66 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_303 b .LBB5_573 -.LBB5_304: # %vector.body5004.preheader +.LBB5_304: # %vector.body5348.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_305: # %vector.body5004 +.LBB5_305: # %vector.body5348 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_305 -# %bb.306: # %vector.body5009.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.306: # %vector.body5353.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_307: # %vector.body5009 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_307: # %vector.body5353 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_307 -# %bb.308: # %vector.body5014.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.308: # %vector.body5361.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_309: # %vector.body5014 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_309: # %vector.body5361 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_309 b .LBB5_573 .LBB5_310: # %.preheader.i1469.preheader @@ -4181,254 +4157,241 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_311: # %vector.body4984 +.LBB5_311: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_311 -# %bb.312: # %vector.body4989.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.312: # %vector.body5324.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_313: # %vector.body4989 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_313: # %vector.body5324 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_313 -# %bb.314: # %vector.body4994.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.314: # %vector.body5332.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_315: # %vector.body4994 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_315: # %vector.body5332 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_315 -# %bb.316: # %vector.body4999.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.316: # %vector.body5340.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_317: # %vector.body4999 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_317: # %vector.body5340 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_317 b .LBB5_573 -.LBB5_318: # %vector.body4964.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_319: # %vector.body4964 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_318: # %vector.body5293.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_319: # %vector.body5293 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_319 -# %bb.320: # %vector.body4969.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1072 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 +# %bb.320: # %vector.body5301.preheader + lu12i.w $a3, 31 + ori $a3, $a3, 1072 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 xvldi $xr0, -1424 -.LBB5_321: # %vector.body4969 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_321 -# %bb.322: # %vector.body4974.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2128 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 -.LBB5_323: # %vector.body4974 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_323 -# %bb.324: # %vector.body4979.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_325: # %vector.body4979 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_321: # %vector.body5301 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_321 +# %bb.322: # %vector.body5306.preheader + lu12i.w $a3, 62 + ori $a3, $a3, 2128 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 +.LBB5_323: # %vector.body5306 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_323 +# %bb.324: # %vector.body5311.preheader + lu12i.w $a3, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 3160 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_325: # %vector.body5311 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_325 b .LBB5_573 -.LBB5_326: # %vector.body4944.preheader +.LBB5_326: # %vector.body5264.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_327: # %vector.body4944 +.LBB5_327: # %vector.body5264 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_327 -# %bb.328: # %vector.body4949.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.328: # %vector.body5269.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_329: # %vector.body4949 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_329: # %vector.body5269 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_329 -# %bb.330: # %vector.body4954.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.330: # %vector.body5277.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_331: # %vector.body4954 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_331: # %vector.body5277 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_331 -# %bb.332: # %vector.body4959.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.332: # %vector.body5285.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_333: # %vector.body4959 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_333: # %vector.body5285 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_333 b .LBB5_573 .LBB5_334: # %.preheader.i1529.preheader @@ -4445,19 +4408,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_335: # %vector.body4934 +.LBB5_335: # %vector.body5254 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_335 -# %bb.336: # %vector.body4939.preheader +# %bb.336: # %vector.body5259.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_337: # %vector.body4939 +.LBB5_337: # %vector.body5259 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -4915,69 +4878,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_358 b .LBB5_573 -.LBB5_359: # %vector.body4850.preheader +.LBB5_359: # %vector.body5164.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_360: # %vector.body4850 +.LBB5_360: # %vector.body5164 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_360 -# %bb.361: # %vector.body4855.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.361: # %vector.body5169.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_362: # %vector.body4855 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a5 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_362: # %vector.body5169 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a3, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_362 -# %bb.363: # %vector.body4860.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.363: # %vector.body5177.preheader + lu12i.w $a3, 62 + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 ori $a1, $a1, 3328 -.LBB5_364: # %vector.body4860 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a4 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 +.LBB5_364: # %vector.body5177 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a2, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a1, $a1, -4 + addi.d $a2, $a2, 16 bnez $a1, .LBB5_364 # %bb.365: # %.preheader.i1631.preheader lu12i.w $a1, 156 @@ -5071,50 +5031,50 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_368 b .LBB5_573 -.LBB5_369: # %vector.body4830.preheader +.LBB5_369: # %vector.body5144.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_370: # %vector.body4830 +.LBB5_370: # %vector.body5144 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_370 -# %bb.371: # %vector.body4835.preheader +# %bb.371: # %vector.body5149.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_372: # %vector.body4835 +.LBB5_372: # %vector.body5149 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_372 -# %bb.373: # %vector.body4840.preheader +# %bb.373: # %vector.body5154.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_374: # %vector.body4840 +.LBB5_374: # %vector.body5154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_374 -# %bb.375: # %vector.body4845.preheader +# %bb.375: # %vector.body5159.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_376: # %vector.body4845 +.LBB5_376: # %vector.body5159 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5122,7 +5082,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_376 b .LBB5_573 -.LBB5_377: # %vector.body4810.preheader +.LBB5_377: # %vector.body5124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -5131,43 +5091,43 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_378: # %vector.body4810 +.LBB5_378: # %vector.body5124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_378 -# %bb.379: # %vector.body4815.preheader +# %bb.379: # %vector.body5129.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_380: # %vector.body4815 +.LBB5_380: # %vector.body5129 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_380 -# %bb.381: # %vector.body4820.preheader +# %bb.381: # %vector.body5134.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_382: # %vector.body4820 +.LBB5_382: # %vector.body5134 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_382 -# %bb.383: # %vector.body4825.preheader +# %bb.383: # %vector.body5139.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_384: # %vector.body4825 +.LBB5_384: # %vector.body5139 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5189,87 +5149,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_386: # %vector.body4790 +.LBB5_386: # %vector.body5095 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_386 -# %bb.387: # %vector.body4795.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.387: # %vector.body5100.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_388: # %vector.body4795 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_388: # %vector.body5100 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_388 -# %bb.389: # %vector.body4800.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.389: # %vector.body5108.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_390: # %vector.body4800 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_390: # %vector.body5108 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_390 -# %bb.391: # %vector.body4805.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.391: # %vector.body5116.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_392: # %vector.body4805 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_392: # %vector.body5116 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_392 b .LBB5_573 .LBB5_393: # %.preheader.i1701.preheader @@ -5286,14 +5241,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_394: # %vector.body4775 +.LBB5_394: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_394 -# %bb.395: # %vector.body4780.preheader +# %bb.395: # %vector.body5085.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -5301,19 +5256,19 @@ init: # @init lu12i.w $a3, 219235 ori $a3, $a3, 1981 xvreplgr2vr.w $xr0, $a3 -.LBB5_396: # %vector.body4780 +.LBB5_396: # %vector.body5085 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_396 -# %bb.397: # %vector.body4785.preheader +# %bb.397: # %vector.body5090.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_398: # %vector.body4785 +.LBB5_398: # %vector.body5090 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5335,87 +5290,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_400: # %vector.body4755 +.LBB5_400: # %vector.body5051 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_400 -# %bb.401: # %vector.body4760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.401: # %vector.body5056.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_402: # %vector.body4760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_402: # %vector.body5056 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_402 -# %bb.403: # %vector.body4765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.403: # %vector.body5064.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_404: # %vector.body4765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_404: # %vector.body5064 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_404 -# %bb.405: # %vector.body4770.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.405: # %vector.body5072.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_406: # %vector.body4770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_406: # %vector.body5072 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_406 b .LBB5_573 .LBB5_407: # %.preheader.i1740.preheader @@ -5432,19 +5382,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_408: # %vector.body4745 +.LBB5_408: # %vector.body5041 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_408 -# %bb.409: # %vector.body4750.preheader +# %bb.409: # %vector.body5046.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_410: # %vector.body4750 +.LBB5_410: # %vector.body5046 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5452,21 +5402,21 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_410 b .LBB5_573 -.LBB5_411: # %vector.body4725.preheader +.LBB5_411: # %vector.body5018.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_412: # %vector.body4725 +.LBB5_412: # %vector.body5018 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_412 -# %bb.413: # %vector.body4730.preheader +# %bb.413: # %vector.body5023.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 @@ -5474,49 +5424,48 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr1, $a4 -.LBB5_414: # %vector.body4730 +.LBB5_414: # %vector.body5023 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a2, -32 xvst $xr1, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_414 -# %bb.415: # %vector.body4735.preheader +# %bb.415: # %vector.body5028.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_416: # %vector.body4735 +.LBB5_416: # %vector.body5028 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_416 -# %bb.417: # %vector.body4740.preheader +# %bb.417: # %vector.body5033.preheader lu12i.w $a2, 93 - ori $a2, $a2, 3156 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_418: # %vector.body4740 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_418: # %vector.body5033 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_418 b .LBB5_573 .LBB5_419: # %.preheader.i1769.preheader @@ -5533,7 +5482,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_420: # %vector.body4720 +.LBB5_420: # %vector.body5013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5555,7 +5504,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_422: # %vector.body4715 +.LBB5_422: # %vector.body5008 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5563,14 +5512,14 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_422 b .LBB5_573 -.LBB5_423: # %vector.body4700.preheader +.LBB5_423: # %vector.body4993.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_424: # %vector.body4700 +.LBB5_424: # %vector.body4993 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5663,14 +5612,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_428 b .LBB5_573 -.LBB5_429: # %vector.body4685.preheader +.LBB5_429: # %vector.body4978.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_430: # %vector.body4685 +.LBB5_430: # %vector.body4978 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5763,32 +5712,31 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_434 b .LBB5_573 -.LBB5_435: # %vector.body4663.preheader +.LBB5_435: # %vector.body4947.preheader + pcalau12i $s1, %pc_hi20(.LCPI5_0) + vld $vr0, $s1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) - addi.d $a0, $s0, 4 - ori $a1, $zero, 2 - lu12i.w $s1, 7 - ori $a2, $s1, 3328 -.LBB5_436: # %vector.body4663 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_436 + addi.d $a0, $s0, 8 + lu12i.w $s2, 7 + ori $a1, $s2, 3328 +.LBB5_436: # %vector.body4947 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_436 # %bb.437: # %.preheader.i1826.preheader lu12i.w $a1, 31 ori $a0, $a1, 1040 @@ -5799,54 +5747,50 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 2104 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_438: # %vector.body4668 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_438 -# %bb.439: # %vector.body4673.preheader + ori $a1, $s2, 3328 +.LBB5_438: # %vector.body4955 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_438 +# %bb.439: # %vector.body4963.preheader lu12i.w $a0, 93 - ori $a0, $a0, 3156 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 3160 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_440: # %vector.body4673 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_440 + ori $a1, $s2, 3328 +.LBB5_440: # %vector.body4963 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_440 # %bb.441: # %.preheader.i1840.preheader lu12i.w $a0, 125 ori $a0, $a0, 128 @@ -5905,30 +5849,30 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_442 b .LBB5_573 -.LBB5_443: # %vector.body4639.preheader +.LBB5_443: # %vector.body4923.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_444: # %vector.body4639 +.LBB5_444: # %vector.body4923 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_444 -# %bb.445: # %vector.body4644.preheader +# %bb.445: # %vector.body4928.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 -.LBB5_446: # %vector.body4644 +.LBB5_446: # %vector.body4928 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5959,14 +5903,14 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_446 -# %bb.447: # %vector.body4651.preheader +# %bb.447: # %vector.body4935.preheader lu12i.w $a4, 62 - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 2096 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_448: # %vector.body4651 +.LBB5_448: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5997,12 +5941,12 @@ init: # @init addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 bnez $a3, .LBB5_448 -# %bb.449: # %vector.body4658.preheader +# %bb.449: # %vector.body4942.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_450: # %vector.body4658 +.LBB5_450: # %vector.body4942 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6010,154 +5954,148 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_450 b .LBB5_573 -.LBB5_451: # %vector.body4624.preheader +.LBB5_451: # %vector.body4902.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_452: # %vector.body4624 +.LBB5_452: # %vector.body4902 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_452 -# %bb.453: # %vector.body4629.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.453: # %vector.body4907.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_454: # %vector.body4629 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_454: # %vector.body4907 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_454 -# %bb.455: # %vector.body4634.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.455: # %vector.body4915.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_456: # %vector.body4634 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_456: # %vector.body4915 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_456 b .LBB5_573 -.LBB5_457: # %vector.body4599.preheader +.LBB5_457: # %vector.body4871.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_458: # %vector.body4599 +.LBB5_458: # %vector.body4871 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_458 -# %bb.459: # %vector.body4604.preheader +# %bb.459: # %vector.body4876.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_460: # %vector.body4604 +.LBB5_460: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_460 -# %bb.461: # %vector.body4609.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.461: # %vector.body4881.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_462: # %vector.body4609 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_462: # %vector.body4881 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_462 -# %bb.463: # %vector.body4614.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.463: # %vector.body4889.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_464: # %vector.body4614 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_464 -# %bb.465: # %vector.body4619.preheader + ori $a3, $a1, 3328 +.LBB5_464: # %vector.body4889 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 + bnez $a3, .LBB5_464 +# %bb.465: # %vector.body4897.preheader lu12i.w $a2, 125 ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3264 -.LBB5_466: # %vector.body4619 +.LBB5_466: # %vector.body4897 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6165,45 +6103,45 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_466 b .LBB5_573 -.LBB5_467: # %vector.body4574.preheader +.LBB5_467: # %vector.body4843.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_468: # %vector.body4574 +.LBB5_468: # %vector.body4843 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_468 -# %bb.469: # %vector.body4579.preheader +# %bb.469: # %vector.body4848.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_470: # %vector.body4579 +.LBB5_470: # %vector.body4848 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_470 -# %bb.471: # %vector.body4584.preheader +# %bb.471: # %vector.body4853.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_472: # %vector.body4584 +.LBB5_472: # %vector.body4853 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_472 -# %bb.473: # %vector.body4589.preheader +# %bb.473: # %vector.body4858.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a2, $a0, $a2 @@ -6211,37 +6149,36 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_474: # %vector.body4589 +.LBB5_474: # %vector.body4858 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_474 -# %bb.475: # %vector.body4594.preheader +# %bb.475: # %vector.body4863.preheader lu12i.w $a2, 125 - ori $a2, $a2, 132 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_476: # %vector.body4594 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_476: # %vector.body4863 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_476 b .LBB5_573 .LBB5_477: # %.preheader.i1927.preheader @@ -6258,74 +6195,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_478: # %vector.body4554 +.LBB5_478: # %vector.body4817 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_478 -# %bb.479: # %vector.body4559.preheader +# %bb.479: # %vector.body4822.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_480: # %vector.body4559 +.LBB5_480: # %vector.body4822 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_480 -# %bb.481: # %vector.body4564.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.481: # %vector.body4827.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_482: # %vector.body4564 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_482: # %vector.body4827 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_482 -# %bb.483: # %vector.body4569.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.483: # %vector.body4835.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_484: # %vector.body4569 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_484: # %vector.body4835 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_484 b .LBB5_573 .LBB5_485: # %.preheader.i1949.preheader @@ -6461,669 +6395,638 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_490 b .LBB5_573 -.LBB5_491: # %vector.body4519.preheader +.LBB5_491: # %vector.body4773.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_492: # %vector.body4519 +.LBB5_492: # %vector.body4773 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_492 -# %bb.493: # %vector.body4524.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.493: # %vector.body4778.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_494: # %vector.body4524 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_494: # %vector.body4778 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_494 -# %bb.495: # %vector.body4529.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.495: # %vector.body4786.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_496: # %vector.body4529 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_496: # %vector.body4786 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_496 -# %bb.497: # %vector.body4534.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.497: # %vector.body4794.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_498: # %vector.body4534 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_498: # %vector.body4794 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_498 b .LBB5_573 -.LBB5_499: # %vector.body4489.preheader +.LBB5_499: # %vector.body4734.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_500: # %vector.body4489 +.LBB5_500: # %vector.body4734 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_500 -# %bb.501: # %vector.body4494.preheader +# %bb.501: # %vector.body4739.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_502: # %vector.body4494 +.LBB5_502: # %vector.body4739 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_502 -# %bb.503: # %vector.body4499.preheader +# %bb.503: # %vector.body4744.preheader lu12i.w $a2, 46 ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_504: # %vector.body4499 +.LBB5_504: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_504 -# %bb.505: # %vector.body4504.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.505: # %vector.body4749.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_506: # %vector.body4504 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_506: # %vector.body4749 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_506 -# %bb.507: # %vector.body4509.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.507: # %vector.body4757.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_508: # %vector.body4509 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_508: # %vector.body4757 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_508 -# %bb.509: # %vector.body4514.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.509: # %vector.body4765.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_510: # %vector.body4514 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_510: # %vector.body4765 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_510 b .LBB5_573 -.LBB5_511: # %vector.body4459.preheader +.LBB5_511: # %vector.body4695.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_512: # %vector.body4459 +.LBB5_512: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_512 -# %bb.513: # %vector.body4464.preheader +# %bb.513: # %vector.body4700.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_514: # %vector.body4464 +.LBB5_514: # %vector.body4700 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_514 -# %bb.515: # %vector.body4469.preheader +# %bb.515: # %vector.body4705.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_516: # %vector.body4469 +.LBB5_516: # %vector.body4705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_516 -# %bb.517: # %vector.body4474.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.517: # %vector.body4710.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_518: # %vector.body4474 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_518: # %vector.body4710 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_518 -# %bb.519: # %vector.body4479.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.519: # %vector.body4718.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_520: # %vector.body4479 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_520: # %vector.body4718 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_520 -# %bb.521: # %vector.body4484.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.521: # %vector.body4726.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_522: # %vector.body4484 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_522: # %vector.body4726 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_522 b .LBB5_573 -.LBB5_523: # %vector.body4429.preheader +.LBB5_523: # %vector.body4656.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_524: # %vector.body4429 +.LBB5_524: # %vector.body4656 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_524 -# %bb.525: # %vector.body4434.preheader +# %bb.525: # %vector.body4661.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_526: # %vector.body4434 +.LBB5_526: # %vector.body4661 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_526 -# %bb.527: # %vector.body4439.preheader +# %bb.527: # %vector.body4666.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_528: # %vector.body4439 +.LBB5_528: # %vector.body4666 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_528 -# %bb.529: # %vector.body4444.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.529: # %vector.body4671.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_530: # %vector.body4444 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_530: # %vector.body4671 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_530 -# %bb.531: # %vector.body4449.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.531: # %vector.body4679.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_532: # %vector.body4449 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_532: # %vector.body4679 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_532 -# %bb.533: # %vector.body4454.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.533: # %vector.body4687.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_534: # %vector.body4454 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_534: # %vector.body4687 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_534 b .LBB5_573 -.LBB5_535: # %vector.body4404.preheader +.LBB5_535: # %vector.body4622.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_536: # %vector.body4404 +.LBB5_536: # %vector.body4622 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_536 -# %bb.537: # %vector.body4409.preheader +# %bb.537: # %vector.body4627.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_538: # %vector.body4409 +.LBB5_538: # %vector.body4627 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_538 -# %bb.539: # %vector.body4414.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.539: # %vector.body4632.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_540: # %vector.body4414 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_540: # %vector.body4632 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_540 -# %bb.541: # %vector.body4419.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.541: # %vector.body4640.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_542: # %vector.body4419 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_542: # %vector.body4640 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_542 -# %bb.543: # %vector.body4424.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.543: # %vector.body4648.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_544: # %vector.body4424 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_544: # %vector.body4648 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_544 b .LBB5_573 -.LBB5_545: # %vector.body4389.preheader +.LBB5_545: # %vector.body4601.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_546: # %vector.body4389 +.LBB5_546: # %vector.body4601 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_546 -# %bb.547: # %vector.body4394.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.547: # %vector.body4606.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_548: # %vector.body4394 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_548: # %vector.body4606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_548 -# %bb.549: # %vector.body4399.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.549: # %vector.body4614.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_550: # %vector.body4399 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_550: # %vector.body4614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_550 b .LBB5_573 -.LBB5_551: # %vector.body4374.preheader +.LBB5_551: # %vector.body4580.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_552: # %vector.body4374 +.LBB5_552: # %vector.body4580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_552 -# %bb.553: # %vector.body4379.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.553: # %vector.body4585.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_554: # %vector.body4379 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_554: # %vector.body4585 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_554 -# %bb.555: # %vector.body4384.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.555: # %vector.body4593.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_556: # %vector.body4384 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_556: # %vector.body4593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_556 b .LBB5_573 .LBB5_557: # %.preheader.i2121.preheader @@ -7140,19 +7043,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_558: # %vector.body4364 +.LBB5_558: # %vector.body4570 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_558 -# %bb.559: # %vector.body4369.preheader +# %bb.559: # %vector.body4575.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_560: # %vector.body4369 +.LBB5_560: # %vector.body4575 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7174,7 +7077,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_562: # %vector.body4359 +.LBB5_562: # %vector.body4565 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7196,7 +7099,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_564: # %vector.body4354 +.LBB5_564: # %vector.body4560 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7204,31 +7107,30 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_564 b .LBB5_573 -.LBB5_565: # %vector.body4349.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_566: # %vector.body4349 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_566 +.LBB5_565: # %vector.body4552.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_566: # %vector.body4552 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_566 b .LBB5_573 .LBB5_567: # %.preheader.i2154.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7377,43 +7279,43 @@ init: # @init bnez $a2, .LBB5_572 .LBB5_573: # %set1d.exit1086 move $a0, $zero - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $fp, $sp, 64 # 8-byte Folded Reload - ld.d $ra, $sp, 72 # 8-byte Folded Reload - addi.d $sp, $sp, 80 + ld.d $s2, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload + ld.d $s0, $sp, 72 # 8-byte Folded Reload + ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $ra, $sp, 88 # 8-byte Folded Reload + addi.d $sp, $sp, 96 ret .LBB5_574: # %.preheader.i2177.preheader pcalau12i $a0, %pc_hi20(global_data+640192) addi.d $a0, $a0, %pc_lo12(global_data+640192) b .LBB5_345 -.LBB5_575: # %vector.body4325.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_576: # %vector.body4325 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_576 +.LBB5_575: # %vector.body4525.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_576: # %vector.body4525 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_576 b .LBB5_573 -.LBB5_577: # %vector.body4320.preheader +.LBB5_577: # %vector.body4520.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 @@ -7421,7 +7323,7 @@ init: # @init lu12i.w $a2, 260096 ori $a2, $a2, 8 xvreplgr2vr.w $xr0, $a2 -.LBB5_578: # %vector.body4320 +.LBB5_578: # %vector.body4520 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7429,162 +7331,155 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_578 b .LBB5_573 -.LBB5_579: # %vector.body4310.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_580: # %vector.body4310 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_579: # %vector.body4504.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_580: # %vector.body4504 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_580 -# %bb.581: # %vector.body4315.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_582: # %vector.body4315 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.581: # %vector.body4512.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_582: # %vector.body4512 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_582 b .LBB5_573 -.LBB5_583: # %vector.body4305.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_584: # %vector.body4305 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_584 +.LBB5_583: # %vector.body4496.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_584: # %vector.body4496 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_584 b .LBB5_573 -.LBB5_585: # %vector.body4300.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_586: # %vector.body4300 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_586 +.LBB5_585: # %vector.body4488.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_586: # %vector.body4488 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_586 b .LBB5_573 -.LBB5_587: # %vector.body4295.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_588: # %vector.body4295 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_588 +.LBB5_587: # %vector.body4480.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_588: # %vector.body4480 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_588 b .LBB5_573 -.LBB5_589: # %vector.body4290.preheader +.LBB5_589: # %vector.body4472.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_590: # %vector.body4290 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_590 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_590: # %vector.body4472 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_590 b .LBB5_607 .LBB5_591: # %.preheader.i2230.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7603,79 +7498,74 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a1, $fp, $a0 - ori $a2, $zero, 2 + ori $a0, $a0, 2104 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + add.d $a2, $fp, $a0 lu12i.w $a0, 7 ori $a3, $a0, 3328 -.LBB5_592: # %vector.body4275 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_592: # %vector.body4448 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_592 -# %bb.593: # %vector.body4280.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.593: # %vector.body4456.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_594: # %vector.body4280 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_594: # %vector.body4456 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_594 -# %bb.595: # %vector.body4285.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.595: # %vector.body4464.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_596: # %vector.body4285 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_596: # %vector.body4464 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_596 b .LBB5_573 .LBB5_597: # %.preheader34.i2253.preheader @@ -7734,42 +7624,41 @@ init: # @init lu12i.w $a1, 220 ori $a1, $a1, 1212 b .LBB5_630 -.LBB5_600: # %vector.body4263.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_601: # %vector.body4263 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_601 - b .LBB5_573 -.LBB5_602: # %vector.body4256.preheader +.LBB5_600: # %vector.body4433.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_601: # %vector.body4433 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_601 + b .LBB5_573 +.LBB5_602: # %vector.body4426.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_603: # %vector.body4256 +.LBB5_603: # %vector.body4426 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -7804,45 +7693,44 @@ init: # @init pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) b .LBB5_610 -.LBB5_605: # %vector.body4251.preheader +.LBB5_605: # %vector.body4418.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_606: # %vector.body4251 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_606 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_606: # %vector.body4418 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_606 .LBB5_607: # %set1d.exit2229 lu12i.w $a1, 31 ori $a1, $a1, 1020 lu12i.w $a2, -262144 b .LBB5_687 -.LBB5_608: # %vector.body4246.preheader +.LBB5_608: # %vector.body4413.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_609: # %vector.body4246 +.LBB5_609: # %vector.body4413 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7855,14 +7743,14 @@ init: # @init add.d $a0, $a0, $a2 ori $a2, $a1, 1024 b .LBB5_346 -.LBB5_611: # %vector.body4241.preheader +.LBB5_611: # %vector.body4408.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_612: # %vector.body4241 +.LBB5_612: # %vector.body4408 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7886,147 +7774,141 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 b .LBB5_573 -.LBB5_614: # %vector.body4216.preheader +.LBB5_614: # %vector.body4374.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_615: # %vector.body4216 +.LBB5_615: # %vector.body4374 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_615 -# %bb.616: # %vector.body4221.preheader +# %bb.616: # %vector.body4379.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_617: # %vector.body4221 +.LBB5_617: # %vector.body4379 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_617 -# %bb.618: # %vector.body4226.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.618: # %vector.body4384.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_619: # %vector.body4226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_619: # %vector.body4384 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_619 -# %bb.620: # %vector.body4231.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.620: # %vector.body4392.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_621: # %vector.body4231 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_621: # %vector.body4392 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_621 -# %bb.622: # %vector.body4236.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.622: # %vector.body4400.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_623: # %vector.body4236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_623: # %vector.body4400 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_623 b .LBB5_573 -.LBB5_624: # %vector.body4211.preheader +.LBB5_624: # %vector.body4366.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_625: # %vector.body4211 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_625 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_625: # %vector.body4366 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_625 # %bb.626: # %set1d.exit2327 lu12i.w $a1, 31 b .LBB5_686 -.LBB5_627: # %vector.body4204.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_627: # %vector.body4359.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_628: # %vector.body4204 +.LBB5_628: # %vector.body4359 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8075,81 +7957,77 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ori $a0, $s0, 1044 + ori $a0, $s0, 1048 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_632: # %vector.body4199 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_632 - b .LBB5_573 -.LBB5_633: # %vector.body4189.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_634: # %vector.body4189 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_634 -# %bb.635: # %vector.body4194.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_636: # %vector.body4194 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_632: # %vector.body4351 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_632 + b .LBB5_573 +.LBB5_633: # %vector.body4335.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_634: # %vector.body4335 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_634 +# %bb.635: # %vector.body4343.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_636: # %vector.body4343 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_636 b .LBB5_573 .LBB5_637: # %.preheader34.i2352.preheader @@ -8248,26 +8126,26 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_640 b .LBB5_573 -.LBB5_641: # %vector.body4167.preheader +.LBB5_641: # %vector.body4313.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_642: # %vector.body4167 +.LBB5_642: # %vector.body4313 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_642 -# %bb.643: # %vector.body4172.preheader +# %bb.643: # %vector.body4318.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_644: # %vector.body4172 +.LBB5_644: # %vector.body4318 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8275,78 +8153,75 @@ init: # @init addi.d $a2, $a2, 64 bnez $a1, .LBB5_644 b .LBB5_653 -.LBB5_645: # %vector.body4157.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_646: # %vector.body4157 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_645: # %vector.body4297.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_646: # %vector.body4297 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_646 -# %bb.647: # %vector.body4162.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_648: # %vector.body4162 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.647: # %vector.body4305.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_648: # %vector.body4305 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_648 b .LBB5_573 -.LBB5_649: # %vector.body4147.preheader +.LBB5_649: # %vector.body4287.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_650: # %vector.body4147 +.LBB5_650: # %vector.body4287 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_650 -# %bb.651: # %vector.body4152.preheader +# %bb.651: # %vector.body4292.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_652: # %vector.body4152 +.LBB5_652: # %vector.body4292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8359,134 +8234,128 @@ init: # @init lu12i.w $a2, 260096 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_654: # %vector.body4132.preheader +.LBB5_654: # %vector.body4266.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_655: # %vector.body4132 +.LBB5_655: # %vector.body4266 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_655 -# %bb.656: # %vector.body4137.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.656: # %vector.body4271.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_657: # %vector.body4137 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_657: # %vector.body4271 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_657 -# %bb.658: # %vector.body4142.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.658: # %vector.body4279.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_659: # %vector.body4142 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_659: # %vector.body4279 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_659 b .LBB5_573 -.LBB5_660: # %vector.body4117.preheader +.LBB5_660: # %vector.body4245.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_661: # %vector.body4117 +.LBB5_661: # %vector.body4245 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_661 -# %bb.662: # %vector.body4122.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.662: # %vector.body4250.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_663: # %vector.body4122 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_663: # %vector.body4250 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_663 -# %bb.664: # %vector.body4127.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.664: # %vector.body4258.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_665: # %vector.body4127 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_665: # %vector.body4258 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_665 b .LBB5_573 .LBB5_666: # %.preheader.i2421.preheader @@ -8503,74 +8372,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_667: # %vector.body4097 +.LBB5_667: # %vector.body4219 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_667 -# %bb.668: # %vector.body4102.preheader +# %bb.668: # %vector.body4224.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_669: # %vector.body4102 +.LBB5_669: # %vector.body4224 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_669 -# %bb.670: # %vector.body4107.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.670: # %vector.body4229.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_671: # %vector.body4107 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_671: # %vector.body4229 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_671 -# %bb.672: # %vector.body4112.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.672: # %vector.body4237.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_673: # %vector.body4112 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_673: # %vector.body4237 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_673 b .LBB5_573 .LBB5_674: # %.preheader.i2443.preheader @@ -8719,69 +8585,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_679 b .LBB5_573 -.LBB5_680: # %vector.body4063.preheader +.LBB5_680: # %vector.body4179.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 xvldi $xr0, -1424 -.LBB5_681: # %vector.body4063 +.LBB5_681: # %vector.body4179 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a3, $a3, -16 addi.d $a1, $a1, 64 bnez $a3, .LBB5_681 -# %bb.682: # %vector.body4068.preheader +# %bb.682: # %vector.body4184.preheader lu12i.w $a1, 31 - ori $a3, $a1, 1044 - add.d $a3, $a0, $a3 - ori $a4, $zero, 2 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a4, $a1, 1048 + add.d $a4, $a0, $a4 ori $a5, $a2, 3328 -.LBB5_683: # %vector.body4068 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - addi.d $a7, $a4, -1 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa0, $a7 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a5, $a5, -2 - addi.w $a4, $a4, 2 +.LBB5_683: # %vector.body4184 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a4, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a5, $a5, -4 + addi.d $a4, $a4, 16 bnez $a5, .LBB5_683 -# %bb.684: # %vector.body4073.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2100 +# %bb.684: # %vector.body4192.preheader + lu12i.w $a4, 62 + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a3, $a4, 2104 add.d $a3, $a0, $a3 - ori $a4, $zero, 2 ori $a2, $a2, 3328 -.LBB5_685: # %vector.body4073 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a4, 31, 0 - addi.d $a6, $a4, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a2, $a2, -2 - addi.w $a4, $a4, 2 +.LBB5_685: # %vector.body4192 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a3, $a3, 16 bnez $a2, .LBB5_685 .LBB5_686: # %set1d.exit2479 ori $a1, $a1, 1020 @@ -8790,16 +8653,16 @@ init: # @init lu32i.d $a2, 0 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_688: # %vector.body4051.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_688: # %vector.body4167.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_689: # %vector.body4051 +.LBB5_689: # %vector.body4167 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8830,12 +8693,12 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_689 -# %bb.690: # %vector.body4058.preheader +# %bb.690: # %vector.body4174.preheader pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 xvldi $xr0, -1424 -.LBB5_691: # %vector.body4058 +.LBB5_691: # %vector.body4174 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -8843,28 +8706,28 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_691 b .LBB5_573 -.LBB5_692: # %vector.body4039.preheader +.LBB5_692: # %vector.body4155.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_693: # %vector.body4039 +.LBB5_693: # %vector.body4155 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_693 -# %bb.694: # %vector.body4044.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.694: # %vector.body4160.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_695: # %vector.body4044 +.LBB5_695: # %vector.body4160 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8904,15 +8767,15 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_697: # %vector.body4032 +.LBB5_697: # %vector.body4148 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8944,28 +8807,28 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_697 b .LBB5_573 -.LBB5_698: # %vector.body4020.preheader +.LBB5_698: # %vector.body4136.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_699: # %vector.body4020 +.LBB5_699: # %vector.body4136 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_699 -# %bb.700: # %vector.body4025.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.700: # %vector.body4141.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_701: # %vector.body4025 +.LBB5_701: # %vector.body4141 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8997,30 +8860,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_701 b .LBB5_573 -.LBB5_702: # %vector.body4008.preheader +.LBB5_702: # %vector.body4124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_703: # %vector.body4008 +.LBB5_703: # %vector.body4124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_703 -# %bb.704: # %vector.body4013.preheader +# %bb.704: # %vector.body4129.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_705: # %vector.body4013 +.LBB5_705: # %vector.body4129 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9052,30 +8915,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_705 b .LBB5_573 -.LBB5_706: # %vector.body3996.preheader +.LBB5_706: # %vector.body4112.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_707: # %vector.body3996 +.LBB5_707: # %vector.body4112 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_707 -# %bb.708: # %vector.body4001.preheader +# %bb.708: # %vector.body4117.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_709: # %vector.body4001 +.LBB5_709: # %vector.body4117 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9107,7 +8970,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_709 b .LBB5_573 -.LBB5_710: # %vector.body3971.preheader +.LBB5_710: # %vector.body4081.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 @@ -9115,7 +8978,7 @@ init: # @init ori $a1, $s0, 3328 xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill -.LBB5_711: # %vector.body3971 +.LBB5_711: # %vector.body4081 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9123,63 +8986,60 @@ init: # @init addi.d $a1, $a1, -16 addi.d $a0, $a0, 64 bnez $a1, .LBB5_711 -# %bb.712: # %vector.body3976.preheader +# %bb.712: # %vector.body4086.preheader lu12i.w $a1, 31 - ori $a0, $a1, 1044 - add.d $a0, $fp, $a0 - ori $a2, $zero, 2 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a2, $a1, 1048 + add.d $a2, $fp, $a2 ori $a3, $s0, 3328 -.LBB5_713: # %vector.body3976 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_713: # %vector.body4086 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_713 -# %bb.714: # %vector.body3981.preheader - lu12i.w $a0, 62 - ori $a0, $a0, 2100 +# %bb.714: # %vector.body4094.preheader + lu12i.w $a2, 62 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a2, 2104 add.d $a0, $fp, $a0 - ori $a2, $zero, 2 - ori $a3, $s0, 3328 -.LBB5_715: # %vector.body3981 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_715 -# %bb.716: # %vector.body3986.preheader + ori $a2, $s0, 3328 +.LBB5_715: # %vector.body4094 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a0, $a0, 16 + bnez $a2, .LBB5_715 +# %bb.716: # %vector.body4102.preheader lu12i.w $a0, 93 ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 xvldi $xr0, -1296 -.LBB5_717: # %vector.body3986 +.LBB5_717: # %vector.body4102 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -9198,7 +9058,7 @@ init: # @init ori $a0, $a0, 2500 add.d $a0, $fp, $a0 ori $a1, $s0, 3328 -.LBB5_719: # %vector.body3991 +.LBB5_719: # %vector.body4107 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9207,238 +9067,225 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_719 b .LBB5_573 -.LBB5_720: # %vector.body3946.preheader +.LBB5_720: # %vector.body4044.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_721: # %vector.body3946 +.LBB5_721: # %vector.body4044 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_721 -# %bb.722: # %vector.body3951.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.722: # %vector.body4049.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_723: # %vector.body3951 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_723: # %vector.body4049 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_723 -# %bb.724: # %vector.body3956.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.724: # %vector.body4057.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_725: # %vector.body3956 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_725: # %vector.body4057 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_725 -# %bb.726: # %vector.body3961.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.726: # %vector.body4065.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_727: # %vector.body3961 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_727: # %vector.body4065 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_727 -# %bb.728: # %vector.body3966.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.728: # %vector.body4073.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_729: # %vector.body3966 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_729: # %vector.body4073 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_729 b .LBB5_573 -.LBB5_730: # %vector.body3931.preheader +.LBB5_730: # %vector.body4023.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_731: # %vector.body3931 +.LBB5_731: # %vector.body4023 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_731 -# %bb.732: # %vector.body3936.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.732: # %vector.body4028.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_733: # %vector.body3936 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_733: # %vector.body4028 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_733 -# %bb.734: # %vector.body3941.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.734: # %vector.body4036.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_735: # %vector.body3941 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_735: # %vector.body4036 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_735 b .LBB5_573 -.LBB5_736: # %vector.body3921.preheader +.LBB5_736: # %vector.body4007.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_737: # %vector.body3921 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_737: # %vector.body4007 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_737 -# %bb.738: # %vector.body3926.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.738: # %vector.body4015.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_739: # %vector.body3926 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_739: # %vector.body4015 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_739 b .LBB5_573 .LBB5_740: # %.preheader.i2608.preheader @@ -9455,14 +9302,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_741: # %vector.body3911 +.LBB5_741: # %vector.body3997 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_741 -# %bb.742: # %vector.body3916.preheader +# %bb.742: # %vector.body4002.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -9470,7 +9317,7 @@ init: # @init lu12i.w $a2, 219235 ori $a2, $a2, 1981 xvreplgr2vr.w $xr0, $a2 -.LBB5_743: # %vector.body3916 +.LBB5_743: # %vector.body4002 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -9488,14 +9335,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_745: # %vector.body3904 +.LBB5_745: # %vector.body3990 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9527,248 +9374,237 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_745 b .LBB5_573 -.LBB5_746: # %vector.body3879.preheader +.LBB5_746: # %vector.body3959.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_747: # %vector.body3879 +.LBB5_747: # %vector.body3959 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_747 -# %bb.748: # %vector.body3884.preheader +# %bb.748: # %vector.body3964.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_749: # %vector.body3884 +.LBB5_749: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_749 -# %bb.750: # %vector.body3889.preheader +# %bb.750: # %vector.body3969.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_751: # %vector.body3889 +.LBB5_751: # %vector.body3969 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_751 -# %bb.752: # %vector.body3894.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.752: # %vector.body3974.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_753: # %vector.body3894 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_753: # %vector.body3974 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_753 -# %bb.754: # %vector.body3899.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.754: # %vector.body3982.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_755: # %vector.body3899 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_755: # %vector.body3982 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_755 b .LBB5_573 -.LBB5_756: # %vector.body3859.preheader +.LBB5_756: # %vector.body3930.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_757: # %vector.body3859 +.LBB5_757: # %vector.body3930 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_757 -# %bb.758: # %vector.body3864.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.758: # %vector.body3935.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_759: # %vector.body3864 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_759: # %vector.body3935 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_759 -# %bb.760: # %vector.body3869.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.760: # %vector.body3943.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_761: # %vector.body3869 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_761: # %vector.body3943 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_761 -# %bb.762: # %vector.body3874.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.762: # %vector.body3951.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_763: # %vector.body3874 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_763: # %vector.body3951 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_763 b .LBB5_573 -.LBB5_764: # %vector.body3844.preheader +.LBB5_764: # %vector.body3909.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_765: # %vector.body3844 +.LBB5_765: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_765 -# %bb.766: # %vector.body3849.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.766: # %vector.body3914.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_767: # %vector.body3849 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_767: # %vector.body3914 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_767 -# %bb.768: # %vector.body3854.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.768: # %vector.body3922.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_769: # %vector.body3854 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_769: # %vector.body3922 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_769 b .LBB5_573 .LBB5_770: # %.preheader.i2684.preheader @@ -9785,103 +9621,99 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_771: # %vector.body3829 +.LBB5_771: # %vector.body3888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_771 -# %bb.772: # %vector.body3834.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.772: # %vector.body3893.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_773: # %vector.body3834 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_773: # %vector.body3893 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_773 -# %bb.774: # %vector.body3839.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.774: # %vector.body3901.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_775: # %vector.body3839 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_775: # %vector.body3901 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_775 b .LBB5_573 -.LBB5_776: # %vector.body3819.preheader +.LBB5_776: # %vector.body3875.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 32 - lu12i.w $a2, 7 - ori $a3, $a2, 3328 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_777: # %vector.body3819 +.LBB5_777: # %vector.body3875 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 - addi.d $a1, $a1, 64 + addi.d $a2, $a2, 64 bnez $a3, .LBB5_777 -# %bb.778: # %vector.body3824.preheader - lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a0, $a0, $a1 - ori $a1, $zero, 2 - ori $a2, $a2, 3328 -.LBB5_779: # %vector.body3824 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_779 +# %bb.778: # %vector.body3880.preheader + lu12i.w $a2, 31 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 1048 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_779: # %vector.body3880 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_779 b .LBB5_573 .LBB5_780: # %.preheader.i2711.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9897,23 +9729,23 @@ init: # @init lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_781: # %vector.body3807 +.LBB5_781: # %vector.body3863 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_781 -# %bb.782: # %vector.body3812.preheader +# %bb.782: # %vector.body3868.preheader lu12i.w $a0, 62 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) pcalau12i $a2, %pc_hi20(.LCPI5_1) - xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_2) + xvld $xr1, $a2, %pc_lo12(.LCPI5_2) ori $a0, $a0, 2096 add.d $a0, $fp, $a0 ori $a1, $a1, 3328 -.LBB5_783: # %vector.body3812 +.LBB5_783: # %vector.body3868 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9959,142 +9791,135 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_785: # %vector.body3792 +.LBB5_785: # %vector.body3842 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_785 -# %bb.786: # %vector.body3797.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.786: # %vector.body3847.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_787: # %vector.body3797 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_787: # %vector.body3847 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_787 -# %bb.788: # %vector.body3802.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.788: # %vector.body3855.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_789: # %vector.body3802 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_789: # %vector.body3855 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_789 b .LBB5_573 -.LBB5_790: # %vector.body3782.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_791: # %vector.body3782 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_790: # %vector.body3826.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_791: # %vector.body3826 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_791 -# %bb.792: # %vector.body3787.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_793: # %vector.body3787 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.792: # %vector.body3834.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_793: # %vector.body3834 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_793 b .LBB5_573 -.LBB5_794: # %vector.body3770.preheader +.LBB5_794: # %vector.body3811.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_795: # %vector.body3770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_795 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_795: # %vector.body3811 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_795 # %bb.796: # %.preheader34.i2757.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -10160,127 +9985,121 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_799: # %vector.body3755 +.LBB5_799: # %vector.body3790 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_799 -# %bb.800: # %vector.body3760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.800: # %vector.body3795.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_801: # %vector.body3760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_801: # %vector.body3795 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_801 -# %bb.802: # %vector.body3765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.802: # %vector.body3803.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_803: # %vector.body3765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_803: # %vector.body3803 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_803 b .LBB5_573 -.LBB5_804: # %vector.body3740.preheader +.LBB5_804: # %vector.body3769.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_805: # %vector.body3740 +.LBB5_805: # %vector.body3769 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_805 -# %bb.806: # %vector.body3745.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.806: # %vector.body3774.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_807: # %vector.body3745 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_807: # %vector.body3774 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_807 -# %bb.808: # %vector.body3750.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.808: # %vector.body3782.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_809: # %vector.body3750 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_809: # %vector.body3782 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_809 b .LBB5_573 .LBB5_810: # %.preheader.i2797.preheader @@ -10293,14 +10112,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_811: # %vector.body3733 +.LBB5_811: # %vector.body3762 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10342,14 +10161,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_813: # %vector.body3726 +.LBB5_813: # %vector.body3755 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10391,14 +10210,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_815: # %vector.body3719 +.LBB5_815: # %vector.body3748 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10440,14 +10259,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_817: # %vector.body3712 +.LBB5_817: # %vector.body3741 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10489,14 +10308,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_819: # %vector.body3705 +.LBB5_819: # %vector.body3734 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10528,26 +10347,26 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_819 b .LBB5_573 -.LBB5_820: # %vector.body3695.preheader +.LBB5_820: # %vector.body3724.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_821: # %vector.body3695 +.LBB5_821: # %vector.body3724 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_821 -# %bb.822: # %vector.body3700.preheader +# %bb.822: # %vector.body3729.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_823: # %vector.body3700 +.LBB5_823: # %vector.body3729 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10555,95 +10374,92 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_823 b .LBB5_573 -.LBB5_824: # %vector.body3680.preheader +.LBB5_824: # %vector.body3703.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_825: # %vector.body3680 +.LBB5_825: # %vector.body3703 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_825 -# %bb.826: # %vector.body3685.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.826: # %vector.body3708.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_827: # %vector.body3685 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_827: # %vector.body3708 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_827 -# %bb.828: # %vector.body3690.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.828: # %vector.body3716.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_829: # %vector.body3690 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_829: # %vector.body3716 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_829 b .LBB5_573 -.LBB5_830: # %vector.body3668.preheader +.LBB5_830: # %vector.body3691.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_831: # %vector.body3668 +.LBB5_831: # %vector.body3691 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_831 -# %bb.832: # %vector.body3673.preheader +# %bb.832: # %vector.body3696.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_833: # %vector.body3673 +.LBB5_833: # %vector.body3696 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10675,16 +10491,16 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_833 b .LBB5_573 -.LBB5_834: # %vector.body3653.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_834: # %vector.body3674.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_835: # %vector.body3653 +.LBB5_835: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10715,7 +10531,7 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_835 -# %bb.836: # %vector.body3658.preheader +# %bb.836: # %vector.body3681.preheader pcalau12i $a1, %pc_hi20(global_data) addi.d $a1, $a1, %pc_lo12(global_data) lu12i.w $a2, 31 @@ -10723,20 +10539,20 @@ init: # @init add.d $a2, $a1, $a2 ori $a3, $a0, 3328 xvldi $xr0, -1424 -.LBB5_837: # %vector.body3658 +.LBB5_837: # %vector.body3681 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_837 -# %bb.838: # %vector.body3663.preheader +# %bb.838: # %vector.body3686.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 xvldi $xr0, -1296 -.LBB5_839: # %vector.body3663 +.LBB5_839: # %vector.body3686 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -10744,40 +10560,40 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_839 b .LBB5_573 -.LBB5_840: # %vector.body3638.preheader +.LBB5_840: # %vector.body3659.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_841: # %vector.body3638 +.LBB5_841: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_841 -# %bb.842: # %vector.body3643.preheader +# %bb.842: # %vector.body3664.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -3264 -.LBB5_843: # %vector.body3643 +.LBB5_843: # %vector.body3664 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_843 -# %bb.844: # %vector.body3648.preheader +# %bb.844: # %vector.body3669.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3265 -.LBB5_845: # %vector.body3648 +.LBB5_845: # %vector.body3669 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10785,82 +10601,78 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_845 b .LBB5_573 -.LBB5_846: # %vector.body3633.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_847: # %vector.body3633 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_847 - b .LBB5_573 -.LBB5_848: # %vector.body3623.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 +.LBB5_846: # %vector.body3651.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_849: # %vector.body3623 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_849 -# %bb.850: # %vector.body3628.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_851: # %vector.body3628 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_847: # %vector.body3651 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_847 + b .LBB5_573 +.LBB5_848: # %vector.body3635.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_849: # %vector.body3635 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_849 +# %bb.850: # %vector.body3643.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_851: # %vector.body3643 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_851 b .LBB5_573 .Lfunc_end5: diff --git a/results/MultiSource/Benchmarks/TSVC/Packing-flt/CMakeFiles/Packing-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Packing-flt/CMakeFiles/Packing-flt.dir/tsc.s index 620f08d..a9341d2 100644 --- a/results/MultiSource/Benchmarks/TSVC/Packing-flt/CMakeFiles/Packing-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Packing-flt/CMakeFiles/Packing-flt.dir/tsc.s @@ -11,6 +11,11 @@ .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI0_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1d .p2align 5 @@ -63,30 +68,29 @@ set1d: # @set1d bnez $a1, .LBB0_3 b .LBB0_8 .LBB0_4: # %vector.body40.preheader - addi.d $a0, $a0, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a1, %pc_hi20(.LCPI0_2) + vld $vr0, $a1, %pc_lo12(.LCPI0_2) + addi.d $a0, $a0, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB0_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB0_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB0_5 b .LBB0_8 .LBB0_6: # %.preheader.preheader move $a2, $zero @@ -118,6 +122,11 @@ set1d: # @set1d .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI1_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1ds .p2align 5 @@ -170,30 +179,29 @@ set1ds: # @set1ds bnez $a0, .LBB1_3 b .LBB1_8 .LBB1_4: # %vector.body40.preheader - addi.d $a0, $a1, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) + addi.d $a0, $a1, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB1_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB1_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB1_5 b .LBB1_8 .LBB1_6: # %.preheader.preheader move $a0, $zero @@ -685,14 +693,19 @@ check: # @check .word .LBB4_14-.LJTI4_0 .word .LBB4_17-.LJTI4_0 # -- End function - .section .rodata.cst32,"aM",@progbits,32 - .p2align 5, 0x0 # -- Begin function init + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 # -- Begin function init .LCPI5_0: + .dword 0 # 0x0 + .dword 1 # 0x1 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 +.LCPI5_1: .dword 4 # 0x4 .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 -.LCPI5_1: +.LCPI5_2: .dword 0 # 0x0 .dword 1 # 0x1 .dword 2 # 0x2 @@ -703,11 +716,12 @@ check: # @check .type init,@function init: # @init # %bb.0: - addi.d $sp, $sp, -80 - st.d $ra, $sp, 72 # 8-byte Folded Spill - st.d $fp, $sp, 64 # 8-byte Folded Spill - st.d $s0, $sp, 56 # 8-byte Folded Spill - st.d $s1, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -96 + st.d $ra, $sp, 88 # 8-byte Folded Spill + st.d $fp, $sp, 80 # 8-byte Folded Spill + st.d $s0, $sp, 72 # 8-byte Folded Spill + st.d $s1, $sp, 64 # 8-byte Folded Spill + st.d $s2, $sp, 56 # 8-byte Folded Spill move $fp, $a0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a1, $a0, %pc_lo12(.L.str.1) @@ -1661,131 +1675,122 @@ init: # @init jirl $ra, $ra, 0 bnez $a0, .LBB5_573 # %bb.136: # %vector.body.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 + addi.d $a3, $a0, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 .LBB5_137: # %vector.body # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_137 # %bb.138: # %vector.body3598.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 + lu12i.w $a3, 31 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 .LBB5_139: # %vector.body3598 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_139 -# %bb.140: # %vector.body3603.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_141: # %vector.body3603 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.140: # %vector.body3606.preheader + lu12i.w $a3, 62 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_141: # %vector.body3606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_141 -# %bb.142: # %vector.body3608.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_143: # %vector.body3608 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.142: # %vector.body3614.preheader + lu12i.w $a3, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_143: # %vector.body3614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_143 -# %bb.144: # %vector.body3613.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_145: # %vector.body3613 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 - bnez $a1, .LBB5_145 +# %bb.144: # %vector.body3622.preheader + lu12i.w $a3, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a3, 136 + add.d $a1, $a0, $a1 + ori $a2, $a2, 3328 +.LBB5_145: # %vector.body3622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_145 # %bb.146: # %.preheader34.i2938.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -1837,7 +1842,7 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_147 b .LBB5_573 -.LBB5_148: # %vector.body5445.preheader +.LBB5_148: # %vector.body5846.preheader pcalau12i $a0, %pc_hi20(X+4) addi.d $a0, $a0, %pc_lo12(X+4) pcalau12i $a1, %pc_hi20(V+4) @@ -1852,7 +1857,7 @@ init: # @init lu12i.w $a6, 7 ori $a6, $a6, 3328 .p2align 4, , 16 -.LBB5_149: # %vector.body5445 +.LBB5_149: # %vector.body5846 # =>This Inner Loop Header: Depth=1 bstrpick.d $a7, $a5, 31, 0 addi.d $t0, $a5, -5 @@ -1896,7 +1901,7 @@ init: # @init addi.d $a4, $a4, 8 bnez $a6, .LBB5_149 b .LBB5_573 -.LBB5_150: # %vector.body5412.preheader +.LBB5_150: # %vector.body5813.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -1904,24 +1909,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_151: # %vector.body5412 +.LBB5_151: # %vector.body5813 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_151 -# %bb.152: # %vector.body5417.preheader +# %bb.152: # %vector.body5818.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_153: # %vector.body5417 +.LBB5_153: # %vector.body5818 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1952,15 +1957,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_153 -# %bb.154: # %vector.body5424.preheader +# %bb.154: # %vector.body5825.preheader lu12i.w $a4, 62 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 2096 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_155: # %vector.body5424 +.LBB5_155: # %vector.body5825 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1991,15 +1996,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_155 -# %bb.156: # %vector.body5431.preheader +# %bb.156: # %vector.body5832.preheader lu12i.w $a4, 93 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 3152 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_157: # %vector.body5431 +.LBB5_157: # %vector.body5832 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2030,15 +2035,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_157 -# %bb.158: # %vector.body5438.preheader +# %bb.158: # %vector.body5839.preheader lu12i.w $a4, 125 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_159: # %vector.body5438 +.LBB5_159: # %vector.body5839 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2070,7 +2075,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_159 b .LBB5_573 -.LBB5_160: # %vector.body5400.preheader +.LBB5_160: # %vector.body5801.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2078,24 +2083,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_161: # %vector.body5400 +.LBB5_161: # %vector.body5801 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_161 -# %bb.162: # %vector.body5405.preheader +# %bb.162: # %vector.body5806.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_163: # %vector.body5405 +.LBB5_163: # %vector.body5806 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2127,7 +2132,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_163 b .LBB5_573 -.LBB5_164: # %vector.body5388.preheader +.LBB5_164: # %vector.body5789.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2135,24 +2140,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_165: # %vector.body5388 +.LBB5_165: # %vector.body5789 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_165 -# %bb.166: # %vector.body5393.preheader +# %bb.166: # %vector.body5794.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_167: # %vector.body5393 +.LBB5_167: # %vector.body5794 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2288,7 +2293,7 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_171 b .LBB5_573 -.LBB5_172: # %vector.body5354.preheader +.LBB5_172: # %vector.body5755.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2296,7 +2301,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_173: # %vector.body5354 +.LBB5_173: # %vector.body5755 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2436,14 +2441,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_179 b .LBB5_573 -.LBB5_180: # %vector.body5349.preheader +.LBB5_180: # %vector.body5750.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_181: # %vector.body5349 +.LBB5_181: # %vector.body5750 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -2451,7 +2456,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_181 b .LBB5_573 -.LBB5_182: # %vector.body5339.preheader +.LBB5_182: # %vector.body5740.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2459,7 +2464,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_183: # %vector.body5339 +.LBB5_183: # %vector.body5740 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2610,30 +2615,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_189 b .LBB5_573 -.LBB5_190: # %vector.body5315.preheader +.LBB5_190: # %vector.body5716.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_191: # %vector.body5315 +.LBB5_191: # %vector.body5716 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_191 -# %bb.192: # %vector.body5320.preheader +# %bb.192: # %vector.body5721.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_193: # %vector.body5320 +.LBB5_193: # %vector.body5721 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2665,30 +2670,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_193 b .LBB5_573 -.LBB5_194: # %vector.body5303.preheader +.LBB5_194: # %vector.body5704.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_195: # %vector.body5303 +.LBB5_195: # %vector.body5704 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_195 -# %bb.196: # %vector.body5308.preheader +# %bb.196: # %vector.body5709.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_197: # %vector.body5308 +.LBB5_197: # %vector.body5709 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2734,74 +2739,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_199: # %vector.body5283 +.LBB5_199: # %vector.body5678 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_199 -# %bb.200: # %vector.body5288.preheader +# %bb.200: # %vector.body5683.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_201: # %vector.body5288 +.LBB5_201: # %vector.body5683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_201 -# %bb.202: # %vector.body5293.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.202: # %vector.body5688.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_203: # %vector.body5293 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_203: # %vector.body5688 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_203 -# %bb.204: # %vector.body5298.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.204: # %vector.body5696.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_205: # %vector.body5298 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_205: # %vector.body5696 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_205 b .LBB5_573 .LBB5_206: # %.preheader.i1195.preheader @@ -2818,74 +2820,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_207: # %vector.body5263 +.LBB5_207: # %vector.body5652 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_207 -# %bb.208: # %vector.body5268.preheader +# %bb.208: # %vector.body5657.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_209: # %vector.body5268 +.LBB5_209: # %vector.body5657 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_209 -# %bb.210: # %vector.body5273.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.210: # %vector.body5662.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_211: # %vector.body5273 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_211: # %vector.body5662 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_211 -# %bb.212: # %vector.body5278.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.212: # %vector.body5670.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_213: # %vector.body5278 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_213: # %vector.body5670 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_213 b .LBB5_573 .LBB5_214: # %.preheader.i1217.preheader @@ -3072,31 +3071,30 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_221 -# %bb.222: # %vector.body5236.preheader - pcalau12i $a1, %pc_hi20(array+4) - addi.d $a1, $a1, %pc_lo12(array+4) - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_223: # %vector.body5236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_223 +# %bb.222: # %vector.body5622.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(array+8) + addi.d $a1, $a1, %pc_lo12(array+8) + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_223: # %vector.body5622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_223 # %bb.224: # %.preheader34.i1253.preheader lu12i.w $a1, 284 ori $a1, $a1, 1888 @@ -3162,87 +3160,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_227: # %vector.body5211 +.LBB5_227: # %vector.body5588 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_227 -# %bb.228: # %vector.body5216.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.228: # %vector.body5593.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_229: # %vector.body5216 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_229: # %vector.body5593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_229 -# %bb.230: # %vector.body5221.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.230: # %vector.body5601.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_231: # %vector.body5221 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_231: # %vector.body5601 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_231 -# %bb.232: # %vector.body5226.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.232: # %vector.body5609.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_233: # %vector.body5226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_233: # %vector.body5609 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_233 b .LBB5_573 .LBB5_234: # %.preheader.i1284.preheader @@ -3259,32 +3252,32 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -3264 -.LBB5_235: # %vector.body5196 +.LBB5_235: # %vector.body5573 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_235 -# %bb.236: # %vector.body5201.preheader +# %bb.236: # %vector.body5578.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_237: # %vector.body5201 +.LBB5_237: # %vector.body5578 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_237 -# %bb.238: # %vector.body5206.preheader +# %bb.238: # %vector.body5583.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_239: # %vector.body5206 +.LBB5_239: # %vector.body5583 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -3292,30 +3285,30 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_239 b .LBB5_573 -.LBB5_240: # %vector.body5184.preheader +.LBB5_240: # %vector.body5561.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_241: # %vector.body5184 +.LBB5_241: # %vector.body5561 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_241 -# %bb.242: # %vector.body5189.preheader +# %bb.242: # %vector.body5566.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_243: # %vector.body5189 +.LBB5_243: # %vector.body5566 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3392,65 +3385,62 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_245 -# %bb.246: # %vector.body5174.preheader +# %bb.246: # %vector.body5545.preheader lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_247: # %vector.body5174 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_247: # %vector.body5545 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_247 -# %bb.248: # %vector.body5179.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.248: # %vector.body5553.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_249: # %vector.body5179 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_249: # %vector.body5553 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_249 b .LBB5_573 -.LBB5_250: # %vector.body5157.preheader +.LBB5_250: # %vector.body5528.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_251: # %vector.body5157 +.LBB5_251: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3506,30 +3496,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_253 b .LBB5_573 -.LBB5_254: # %vector.body5145.preheader +.LBB5_254: # %vector.body5516.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_255: # %vector.body5145 +.LBB5_255: # %vector.body5516 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_255 -# %bb.256: # %vector.body5150.preheader +# %bb.256: # %vector.body5521.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_257: # %vector.body5150 +.LBB5_257: # %vector.body5521 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3561,14 +3551,14 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_257 b .LBB5_573 -.LBB5_258: # %vector.body5125.preheader +.LBB5_258: # %vector.body5487.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 xvldi $xr0, -1424 -.LBB5_259: # %vector.body5125 +.LBB5_259: # %vector.body5487 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3583,274 +3573,263 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 + lu12i.w $a1, 62 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 2104 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_261: # %vector.body5130 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_261: # %vector.body5492 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_261 -# %bb.262: # %vector.body5135.preheader - lu12i.w $a0, 93 - ori $a0, $a0, 3156 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 +# %bb.262: # %vector.body5500.preheader + lu12i.w $a1, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 3160 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_263: # %vector.body5135 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_263: # %vector.body5500 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_263 -# %bb.264: # %vector.body5140.preheader - lu12i.w $a0, 125 - ori $a0, $a0, 132 +# %bb.264: # %vector.body5508.preheader + lu12i.w $a1, 125 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a1, 136 add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - ori $a2, $s0, 3328 -.LBB5_265: # %vector.body5140 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_265 + ori $a1, $s0, 3328 +.LBB5_265: # %vector.body5508 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_265 b .LBB5_573 -.LBB5_266: # %vector.body5094.preheader +.LBB5_266: # %vector.body5450.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_267: # %vector.body5094 +.LBB5_267: # %vector.body5450 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_267 -# %bb.268: # %vector.body5099.preheader +# %bb.268: # %vector.body5455.preheader lu12i.w $a2, 31 ori $a3, $a2, 1048 add.d $a4, $a0, $a3 lu12i.w $a3, 3 ori $a5, $a3, 3712 lu12i.w $a6, 260096 -.LBB5_269: # %vector.body5099 +.LBB5_269: # %vector.body5455 # =>This Inner Loop Header: Depth=1 st.w $a6, $a4, -8 st.w $a6, $a4, 0 addi.d $a5, $a5, -2 addi.d $a4, $a4, 16 bnez $a5, .LBB5_269 -# %bb.270: # %vector.body5104.preheader +# %bb.270: # %vector.body5460.preheader ori $a2, $a2, 1052 add.d $a2, $a0, $a2 ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 -.LBB5_271: # %vector.body5104 +.LBB5_271: # %vector.body5460 # =>This Inner Loop Header: Depth=1 st.w $a4, $a2, -8 st.w $a4, $a2, 0 addi.d $a3, $a3, -2 addi.d $a2, $a2, 16 bnez $a3, .LBB5_271 -# %bb.272: # %vector.body5110.preheader +# %bb.272: # %vector.body5466.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_273: # %vector.body5110 +.LBB5_273: # %vector.body5466 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_273 -# %bb.274: # %vector.body5115.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.274: # %vector.body5471.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_275: # %vector.body5115 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_275: # %vector.body5471 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_275 -# %bb.276: # %vector.body5120.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.276: # %vector.body5479.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_277: # %vector.body5120 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_277: # %vector.body5479 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_277 b .LBB5_573 -.LBB5_278: # %vector.body5079.preheader +.LBB5_278: # %vector.body5429.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_279: # %vector.body5079 +.LBB5_279: # %vector.body5429 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_279 -# %bb.280: # %vector.body5084.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.280: # %vector.body5434.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_281: # %vector.body5084 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_281: # %vector.body5434 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_281 -# %bb.282: # %vector.body5089.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.282: # %vector.body5442.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_283: # %vector.body5089 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_283: # %vector.body5442 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_283 b .LBB5_573 -.LBB5_284: # %vector.body5067.preheader +.LBB5_284: # %vector.body5417.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_285: # %vector.body5067 +.LBB5_285: # %vector.body5417 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_285 -# %bb.286: # %vector.body5072.preheader +# %bb.286: # %vector.body5422.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_287: # %vector.body5072 +.LBB5_287: # %vector.body5422 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3882,30 +3861,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_287 b .LBB5_573 -.LBB5_288: # %vector.body5055.preheader +.LBB5_288: # %vector.body5405.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_289: # %vector.body5055 +.LBB5_289: # %vector.body5405 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_289 -# %bb.290: # %vector.body5060.preheader +# %bb.290: # %vector.body5410.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_291: # %vector.body5060 +.LBB5_291: # %vector.body5410 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3937,30 +3916,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_291 b .LBB5_573 -.LBB5_292: # %vector.body5043.preheader +.LBB5_292: # %vector.body5393.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_293: # %vector.body5043 +.LBB5_293: # %vector.body5393 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_293 -# %bb.294: # %vector.body5048.preheader +# %bb.294: # %vector.body5398.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_295: # %vector.body5048 +.LBB5_295: # %vector.body5398 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3992,30 +3971,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_295 b .LBB5_573 -.LBB5_296: # %vector.body5031.preheader +.LBB5_296: # %vector.body5381.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_297: # %vector.body5031 +.LBB5_297: # %vector.body5381 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_297 -# %bb.298: # %vector.body5036.preheader +# %bb.298: # %vector.body5386.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_299: # %vector.body5036 +.LBB5_299: # %vector.body5386 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4047,30 +4026,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_299 b .LBB5_573 -.LBB5_300: # %vector.body5019.preheader +.LBB5_300: # %vector.body5369.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_301: # %vector.body5019 +.LBB5_301: # %vector.body5369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_301 -# %bb.302: # %vector.body5024.preheader +# %bb.302: # %vector.body5374.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_303: # %vector.body5024 +.LBB5_303: # %vector.body5374 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4102,69 +4081,66 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_303 b .LBB5_573 -.LBB5_304: # %vector.body5004.preheader +.LBB5_304: # %vector.body5348.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_305: # %vector.body5004 +.LBB5_305: # %vector.body5348 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_305 -# %bb.306: # %vector.body5009.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.306: # %vector.body5353.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_307: # %vector.body5009 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_307: # %vector.body5353 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_307 -# %bb.308: # %vector.body5014.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.308: # %vector.body5361.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_309: # %vector.body5014 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_309: # %vector.body5361 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_309 b .LBB5_573 .LBB5_310: # %.preheader.i1469.preheader @@ -4181,254 +4157,241 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_311: # %vector.body4984 +.LBB5_311: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_311 -# %bb.312: # %vector.body4989.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.312: # %vector.body5324.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_313: # %vector.body4989 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_313: # %vector.body5324 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_313 -# %bb.314: # %vector.body4994.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.314: # %vector.body5332.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_315: # %vector.body4994 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_315: # %vector.body5332 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_315 -# %bb.316: # %vector.body4999.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.316: # %vector.body5340.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_317: # %vector.body4999 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_317: # %vector.body5340 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_317 b .LBB5_573 -.LBB5_318: # %vector.body4964.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_319: # %vector.body4964 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_318: # %vector.body5293.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_319: # %vector.body5293 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_319 -# %bb.320: # %vector.body4969.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1072 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 +# %bb.320: # %vector.body5301.preheader + lu12i.w $a3, 31 + ori $a3, $a3, 1072 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 xvldi $xr0, -1424 -.LBB5_321: # %vector.body4969 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_321 -# %bb.322: # %vector.body4974.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2128 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 -.LBB5_323: # %vector.body4974 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_323 -# %bb.324: # %vector.body4979.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_325: # %vector.body4979 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_321: # %vector.body5301 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_321 +# %bb.322: # %vector.body5306.preheader + lu12i.w $a3, 62 + ori $a3, $a3, 2128 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 +.LBB5_323: # %vector.body5306 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_323 +# %bb.324: # %vector.body5311.preheader + lu12i.w $a3, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 3160 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_325: # %vector.body5311 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_325 b .LBB5_573 -.LBB5_326: # %vector.body4944.preheader +.LBB5_326: # %vector.body5264.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_327: # %vector.body4944 +.LBB5_327: # %vector.body5264 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_327 -# %bb.328: # %vector.body4949.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.328: # %vector.body5269.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_329: # %vector.body4949 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_329: # %vector.body5269 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_329 -# %bb.330: # %vector.body4954.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.330: # %vector.body5277.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_331: # %vector.body4954 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_331: # %vector.body5277 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_331 -# %bb.332: # %vector.body4959.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.332: # %vector.body5285.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_333: # %vector.body4959 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_333: # %vector.body5285 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_333 b .LBB5_573 .LBB5_334: # %.preheader.i1529.preheader @@ -4445,19 +4408,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_335: # %vector.body4934 +.LBB5_335: # %vector.body5254 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_335 -# %bb.336: # %vector.body4939.preheader +# %bb.336: # %vector.body5259.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_337: # %vector.body4939 +.LBB5_337: # %vector.body5259 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -4915,69 +4878,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_358 b .LBB5_573 -.LBB5_359: # %vector.body4850.preheader +.LBB5_359: # %vector.body5164.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_360: # %vector.body4850 +.LBB5_360: # %vector.body5164 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_360 -# %bb.361: # %vector.body4855.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.361: # %vector.body5169.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_362: # %vector.body4855 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a5 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_362: # %vector.body5169 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a3, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_362 -# %bb.363: # %vector.body4860.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.363: # %vector.body5177.preheader + lu12i.w $a3, 62 + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 ori $a1, $a1, 3328 -.LBB5_364: # %vector.body4860 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a4 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 +.LBB5_364: # %vector.body5177 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a2, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a1, $a1, -4 + addi.d $a2, $a2, 16 bnez $a1, .LBB5_364 # %bb.365: # %.preheader.i1631.preheader lu12i.w $a1, 156 @@ -5071,50 +5031,50 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_368 b .LBB5_573 -.LBB5_369: # %vector.body4830.preheader +.LBB5_369: # %vector.body5144.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_370: # %vector.body4830 +.LBB5_370: # %vector.body5144 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_370 -# %bb.371: # %vector.body4835.preheader +# %bb.371: # %vector.body5149.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_372: # %vector.body4835 +.LBB5_372: # %vector.body5149 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_372 -# %bb.373: # %vector.body4840.preheader +# %bb.373: # %vector.body5154.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_374: # %vector.body4840 +.LBB5_374: # %vector.body5154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_374 -# %bb.375: # %vector.body4845.preheader +# %bb.375: # %vector.body5159.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_376: # %vector.body4845 +.LBB5_376: # %vector.body5159 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5122,7 +5082,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_376 b .LBB5_573 -.LBB5_377: # %vector.body4810.preheader +.LBB5_377: # %vector.body5124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -5131,43 +5091,43 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_378: # %vector.body4810 +.LBB5_378: # %vector.body5124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_378 -# %bb.379: # %vector.body4815.preheader +# %bb.379: # %vector.body5129.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_380: # %vector.body4815 +.LBB5_380: # %vector.body5129 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_380 -# %bb.381: # %vector.body4820.preheader +# %bb.381: # %vector.body5134.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_382: # %vector.body4820 +.LBB5_382: # %vector.body5134 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_382 -# %bb.383: # %vector.body4825.preheader +# %bb.383: # %vector.body5139.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_384: # %vector.body4825 +.LBB5_384: # %vector.body5139 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5189,87 +5149,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_386: # %vector.body4790 +.LBB5_386: # %vector.body5095 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_386 -# %bb.387: # %vector.body4795.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.387: # %vector.body5100.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_388: # %vector.body4795 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_388: # %vector.body5100 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_388 -# %bb.389: # %vector.body4800.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.389: # %vector.body5108.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_390: # %vector.body4800 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_390: # %vector.body5108 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_390 -# %bb.391: # %vector.body4805.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.391: # %vector.body5116.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_392: # %vector.body4805 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_392: # %vector.body5116 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_392 b .LBB5_573 .LBB5_393: # %.preheader.i1701.preheader @@ -5286,14 +5241,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_394: # %vector.body4775 +.LBB5_394: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_394 -# %bb.395: # %vector.body4780.preheader +# %bb.395: # %vector.body5085.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -5301,19 +5256,19 @@ init: # @init lu12i.w $a3, 219235 ori $a3, $a3, 1981 xvreplgr2vr.w $xr0, $a3 -.LBB5_396: # %vector.body4780 +.LBB5_396: # %vector.body5085 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_396 -# %bb.397: # %vector.body4785.preheader +# %bb.397: # %vector.body5090.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_398: # %vector.body4785 +.LBB5_398: # %vector.body5090 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5335,87 +5290,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_400: # %vector.body4755 +.LBB5_400: # %vector.body5051 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_400 -# %bb.401: # %vector.body4760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.401: # %vector.body5056.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_402: # %vector.body4760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_402: # %vector.body5056 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_402 -# %bb.403: # %vector.body4765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.403: # %vector.body5064.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_404: # %vector.body4765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_404: # %vector.body5064 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_404 -# %bb.405: # %vector.body4770.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.405: # %vector.body5072.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_406: # %vector.body4770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_406: # %vector.body5072 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_406 b .LBB5_573 .LBB5_407: # %.preheader.i1740.preheader @@ -5432,19 +5382,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_408: # %vector.body4745 +.LBB5_408: # %vector.body5041 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_408 -# %bb.409: # %vector.body4750.preheader +# %bb.409: # %vector.body5046.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_410: # %vector.body4750 +.LBB5_410: # %vector.body5046 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5452,21 +5402,21 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_410 b .LBB5_573 -.LBB5_411: # %vector.body4725.preheader +.LBB5_411: # %vector.body5018.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_412: # %vector.body4725 +.LBB5_412: # %vector.body5018 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_412 -# %bb.413: # %vector.body4730.preheader +# %bb.413: # %vector.body5023.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 @@ -5474,49 +5424,48 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr1, $a4 -.LBB5_414: # %vector.body4730 +.LBB5_414: # %vector.body5023 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a2, -32 xvst $xr1, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_414 -# %bb.415: # %vector.body4735.preheader +# %bb.415: # %vector.body5028.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_416: # %vector.body4735 +.LBB5_416: # %vector.body5028 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_416 -# %bb.417: # %vector.body4740.preheader +# %bb.417: # %vector.body5033.preheader lu12i.w $a2, 93 - ori $a2, $a2, 3156 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_418: # %vector.body4740 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_418: # %vector.body5033 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_418 b .LBB5_573 .LBB5_419: # %.preheader.i1769.preheader @@ -5533,7 +5482,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_420: # %vector.body4720 +.LBB5_420: # %vector.body5013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5555,7 +5504,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_422: # %vector.body4715 +.LBB5_422: # %vector.body5008 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5563,14 +5512,14 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_422 b .LBB5_573 -.LBB5_423: # %vector.body4700.preheader +.LBB5_423: # %vector.body4993.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_424: # %vector.body4700 +.LBB5_424: # %vector.body4993 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5663,14 +5612,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_428 b .LBB5_573 -.LBB5_429: # %vector.body4685.preheader +.LBB5_429: # %vector.body4978.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_430: # %vector.body4685 +.LBB5_430: # %vector.body4978 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5763,32 +5712,31 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_434 b .LBB5_573 -.LBB5_435: # %vector.body4663.preheader +.LBB5_435: # %vector.body4947.preheader + pcalau12i $s1, %pc_hi20(.LCPI5_0) + vld $vr0, $s1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) - addi.d $a0, $s0, 4 - ori $a1, $zero, 2 - lu12i.w $s1, 7 - ori $a2, $s1, 3328 -.LBB5_436: # %vector.body4663 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_436 + addi.d $a0, $s0, 8 + lu12i.w $s2, 7 + ori $a1, $s2, 3328 +.LBB5_436: # %vector.body4947 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_436 # %bb.437: # %.preheader.i1826.preheader lu12i.w $a1, 31 ori $a0, $a1, 1040 @@ -5799,54 +5747,50 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 2104 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_438: # %vector.body4668 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_438 -# %bb.439: # %vector.body4673.preheader + ori $a1, $s2, 3328 +.LBB5_438: # %vector.body4955 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_438 +# %bb.439: # %vector.body4963.preheader lu12i.w $a0, 93 - ori $a0, $a0, 3156 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 3160 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_440: # %vector.body4673 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_440 + ori $a1, $s2, 3328 +.LBB5_440: # %vector.body4963 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_440 # %bb.441: # %.preheader.i1840.preheader lu12i.w $a0, 125 ori $a0, $a0, 128 @@ -5905,30 +5849,30 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_442 b .LBB5_573 -.LBB5_443: # %vector.body4639.preheader +.LBB5_443: # %vector.body4923.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_444: # %vector.body4639 +.LBB5_444: # %vector.body4923 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_444 -# %bb.445: # %vector.body4644.preheader +# %bb.445: # %vector.body4928.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 -.LBB5_446: # %vector.body4644 +.LBB5_446: # %vector.body4928 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5959,14 +5903,14 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_446 -# %bb.447: # %vector.body4651.preheader +# %bb.447: # %vector.body4935.preheader lu12i.w $a4, 62 - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 2096 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_448: # %vector.body4651 +.LBB5_448: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5997,12 +5941,12 @@ init: # @init addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 bnez $a3, .LBB5_448 -# %bb.449: # %vector.body4658.preheader +# %bb.449: # %vector.body4942.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_450: # %vector.body4658 +.LBB5_450: # %vector.body4942 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6010,154 +5954,148 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_450 b .LBB5_573 -.LBB5_451: # %vector.body4624.preheader +.LBB5_451: # %vector.body4902.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_452: # %vector.body4624 +.LBB5_452: # %vector.body4902 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_452 -# %bb.453: # %vector.body4629.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.453: # %vector.body4907.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_454: # %vector.body4629 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_454: # %vector.body4907 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_454 -# %bb.455: # %vector.body4634.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.455: # %vector.body4915.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_456: # %vector.body4634 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_456: # %vector.body4915 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_456 b .LBB5_573 -.LBB5_457: # %vector.body4599.preheader +.LBB5_457: # %vector.body4871.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_458: # %vector.body4599 +.LBB5_458: # %vector.body4871 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_458 -# %bb.459: # %vector.body4604.preheader +# %bb.459: # %vector.body4876.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_460: # %vector.body4604 +.LBB5_460: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_460 -# %bb.461: # %vector.body4609.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.461: # %vector.body4881.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_462: # %vector.body4609 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_462: # %vector.body4881 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_462 -# %bb.463: # %vector.body4614.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.463: # %vector.body4889.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_464: # %vector.body4614 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_464 -# %bb.465: # %vector.body4619.preheader + ori $a3, $a1, 3328 +.LBB5_464: # %vector.body4889 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 + bnez $a3, .LBB5_464 +# %bb.465: # %vector.body4897.preheader lu12i.w $a2, 125 ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3264 -.LBB5_466: # %vector.body4619 +.LBB5_466: # %vector.body4897 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6165,45 +6103,45 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_466 b .LBB5_573 -.LBB5_467: # %vector.body4574.preheader +.LBB5_467: # %vector.body4843.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_468: # %vector.body4574 +.LBB5_468: # %vector.body4843 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_468 -# %bb.469: # %vector.body4579.preheader +# %bb.469: # %vector.body4848.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_470: # %vector.body4579 +.LBB5_470: # %vector.body4848 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_470 -# %bb.471: # %vector.body4584.preheader +# %bb.471: # %vector.body4853.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_472: # %vector.body4584 +.LBB5_472: # %vector.body4853 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_472 -# %bb.473: # %vector.body4589.preheader +# %bb.473: # %vector.body4858.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a2, $a0, $a2 @@ -6211,37 +6149,36 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_474: # %vector.body4589 +.LBB5_474: # %vector.body4858 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_474 -# %bb.475: # %vector.body4594.preheader +# %bb.475: # %vector.body4863.preheader lu12i.w $a2, 125 - ori $a2, $a2, 132 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_476: # %vector.body4594 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_476: # %vector.body4863 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_476 b .LBB5_573 .LBB5_477: # %.preheader.i1927.preheader @@ -6258,74 +6195,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_478: # %vector.body4554 +.LBB5_478: # %vector.body4817 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_478 -# %bb.479: # %vector.body4559.preheader +# %bb.479: # %vector.body4822.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_480: # %vector.body4559 +.LBB5_480: # %vector.body4822 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_480 -# %bb.481: # %vector.body4564.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.481: # %vector.body4827.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_482: # %vector.body4564 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_482: # %vector.body4827 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_482 -# %bb.483: # %vector.body4569.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.483: # %vector.body4835.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_484: # %vector.body4569 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_484: # %vector.body4835 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_484 b .LBB5_573 .LBB5_485: # %.preheader.i1949.preheader @@ -6461,669 +6395,638 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_490 b .LBB5_573 -.LBB5_491: # %vector.body4519.preheader +.LBB5_491: # %vector.body4773.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_492: # %vector.body4519 +.LBB5_492: # %vector.body4773 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_492 -# %bb.493: # %vector.body4524.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.493: # %vector.body4778.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_494: # %vector.body4524 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_494: # %vector.body4778 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_494 -# %bb.495: # %vector.body4529.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.495: # %vector.body4786.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_496: # %vector.body4529 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_496: # %vector.body4786 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_496 -# %bb.497: # %vector.body4534.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.497: # %vector.body4794.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_498: # %vector.body4534 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_498: # %vector.body4794 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_498 b .LBB5_573 -.LBB5_499: # %vector.body4489.preheader +.LBB5_499: # %vector.body4734.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_500: # %vector.body4489 +.LBB5_500: # %vector.body4734 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_500 -# %bb.501: # %vector.body4494.preheader +# %bb.501: # %vector.body4739.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_502: # %vector.body4494 +.LBB5_502: # %vector.body4739 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_502 -# %bb.503: # %vector.body4499.preheader +# %bb.503: # %vector.body4744.preheader lu12i.w $a2, 46 ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_504: # %vector.body4499 +.LBB5_504: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_504 -# %bb.505: # %vector.body4504.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.505: # %vector.body4749.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_506: # %vector.body4504 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_506: # %vector.body4749 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_506 -# %bb.507: # %vector.body4509.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.507: # %vector.body4757.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_508: # %vector.body4509 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_508: # %vector.body4757 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_508 -# %bb.509: # %vector.body4514.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.509: # %vector.body4765.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_510: # %vector.body4514 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_510: # %vector.body4765 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_510 b .LBB5_573 -.LBB5_511: # %vector.body4459.preheader +.LBB5_511: # %vector.body4695.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_512: # %vector.body4459 +.LBB5_512: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_512 -# %bb.513: # %vector.body4464.preheader +# %bb.513: # %vector.body4700.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_514: # %vector.body4464 +.LBB5_514: # %vector.body4700 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_514 -# %bb.515: # %vector.body4469.preheader +# %bb.515: # %vector.body4705.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_516: # %vector.body4469 +.LBB5_516: # %vector.body4705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_516 -# %bb.517: # %vector.body4474.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.517: # %vector.body4710.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_518: # %vector.body4474 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_518: # %vector.body4710 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_518 -# %bb.519: # %vector.body4479.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.519: # %vector.body4718.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_520: # %vector.body4479 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_520: # %vector.body4718 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_520 -# %bb.521: # %vector.body4484.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.521: # %vector.body4726.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_522: # %vector.body4484 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_522: # %vector.body4726 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_522 b .LBB5_573 -.LBB5_523: # %vector.body4429.preheader +.LBB5_523: # %vector.body4656.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_524: # %vector.body4429 +.LBB5_524: # %vector.body4656 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_524 -# %bb.525: # %vector.body4434.preheader +# %bb.525: # %vector.body4661.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_526: # %vector.body4434 +.LBB5_526: # %vector.body4661 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_526 -# %bb.527: # %vector.body4439.preheader +# %bb.527: # %vector.body4666.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_528: # %vector.body4439 +.LBB5_528: # %vector.body4666 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_528 -# %bb.529: # %vector.body4444.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.529: # %vector.body4671.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_530: # %vector.body4444 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_530: # %vector.body4671 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_530 -# %bb.531: # %vector.body4449.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.531: # %vector.body4679.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_532: # %vector.body4449 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_532: # %vector.body4679 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_532 -# %bb.533: # %vector.body4454.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.533: # %vector.body4687.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_534: # %vector.body4454 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_534: # %vector.body4687 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_534 b .LBB5_573 -.LBB5_535: # %vector.body4404.preheader +.LBB5_535: # %vector.body4622.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_536: # %vector.body4404 +.LBB5_536: # %vector.body4622 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_536 -# %bb.537: # %vector.body4409.preheader +# %bb.537: # %vector.body4627.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_538: # %vector.body4409 +.LBB5_538: # %vector.body4627 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_538 -# %bb.539: # %vector.body4414.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.539: # %vector.body4632.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_540: # %vector.body4414 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_540: # %vector.body4632 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_540 -# %bb.541: # %vector.body4419.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.541: # %vector.body4640.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_542: # %vector.body4419 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_542: # %vector.body4640 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_542 -# %bb.543: # %vector.body4424.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.543: # %vector.body4648.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_544: # %vector.body4424 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_544: # %vector.body4648 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_544 b .LBB5_573 -.LBB5_545: # %vector.body4389.preheader +.LBB5_545: # %vector.body4601.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_546: # %vector.body4389 +.LBB5_546: # %vector.body4601 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_546 -# %bb.547: # %vector.body4394.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.547: # %vector.body4606.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_548: # %vector.body4394 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_548: # %vector.body4606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_548 -# %bb.549: # %vector.body4399.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.549: # %vector.body4614.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_550: # %vector.body4399 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_550: # %vector.body4614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_550 b .LBB5_573 -.LBB5_551: # %vector.body4374.preheader +.LBB5_551: # %vector.body4580.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_552: # %vector.body4374 +.LBB5_552: # %vector.body4580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_552 -# %bb.553: # %vector.body4379.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.553: # %vector.body4585.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_554: # %vector.body4379 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_554: # %vector.body4585 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_554 -# %bb.555: # %vector.body4384.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.555: # %vector.body4593.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_556: # %vector.body4384 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_556: # %vector.body4593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_556 b .LBB5_573 .LBB5_557: # %.preheader.i2121.preheader @@ -7140,19 +7043,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_558: # %vector.body4364 +.LBB5_558: # %vector.body4570 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_558 -# %bb.559: # %vector.body4369.preheader +# %bb.559: # %vector.body4575.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_560: # %vector.body4369 +.LBB5_560: # %vector.body4575 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7174,7 +7077,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_562: # %vector.body4359 +.LBB5_562: # %vector.body4565 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7196,7 +7099,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_564: # %vector.body4354 +.LBB5_564: # %vector.body4560 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7204,31 +7107,30 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_564 b .LBB5_573 -.LBB5_565: # %vector.body4349.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_566: # %vector.body4349 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_566 +.LBB5_565: # %vector.body4552.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_566: # %vector.body4552 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_566 b .LBB5_573 .LBB5_567: # %.preheader.i2154.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7377,43 +7279,43 @@ init: # @init bnez $a2, .LBB5_572 .LBB5_573: # %set1d.exit1086 move $a0, $zero - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $fp, $sp, 64 # 8-byte Folded Reload - ld.d $ra, $sp, 72 # 8-byte Folded Reload - addi.d $sp, $sp, 80 + ld.d $s2, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload + ld.d $s0, $sp, 72 # 8-byte Folded Reload + ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $ra, $sp, 88 # 8-byte Folded Reload + addi.d $sp, $sp, 96 ret .LBB5_574: # %.preheader.i2177.preheader pcalau12i $a0, %pc_hi20(global_data+640192) addi.d $a0, $a0, %pc_lo12(global_data+640192) b .LBB5_345 -.LBB5_575: # %vector.body4325.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_576: # %vector.body4325 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_576 +.LBB5_575: # %vector.body4525.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_576: # %vector.body4525 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_576 b .LBB5_573 -.LBB5_577: # %vector.body4320.preheader +.LBB5_577: # %vector.body4520.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 @@ -7421,7 +7323,7 @@ init: # @init lu12i.w $a2, 260096 ori $a2, $a2, 8 xvreplgr2vr.w $xr0, $a2 -.LBB5_578: # %vector.body4320 +.LBB5_578: # %vector.body4520 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7429,162 +7331,155 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_578 b .LBB5_573 -.LBB5_579: # %vector.body4310.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_580: # %vector.body4310 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_579: # %vector.body4504.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_580: # %vector.body4504 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_580 -# %bb.581: # %vector.body4315.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_582: # %vector.body4315 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.581: # %vector.body4512.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_582: # %vector.body4512 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_582 b .LBB5_573 -.LBB5_583: # %vector.body4305.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_584: # %vector.body4305 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_584 +.LBB5_583: # %vector.body4496.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_584: # %vector.body4496 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_584 b .LBB5_573 -.LBB5_585: # %vector.body4300.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_586: # %vector.body4300 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_586 +.LBB5_585: # %vector.body4488.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_586: # %vector.body4488 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_586 b .LBB5_573 -.LBB5_587: # %vector.body4295.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_588: # %vector.body4295 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_588 +.LBB5_587: # %vector.body4480.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_588: # %vector.body4480 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_588 b .LBB5_573 -.LBB5_589: # %vector.body4290.preheader +.LBB5_589: # %vector.body4472.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_590: # %vector.body4290 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_590 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_590: # %vector.body4472 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_590 b .LBB5_607 .LBB5_591: # %.preheader.i2230.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7603,79 +7498,74 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a1, $fp, $a0 - ori $a2, $zero, 2 + ori $a0, $a0, 2104 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + add.d $a2, $fp, $a0 lu12i.w $a0, 7 ori $a3, $a0, 3328 -.LBB5_592: # %vector.body4275 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_592: # %vector.body4448 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_592 -# %bb.593: # %vector.body4280.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.593: # %vector.body4456.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_594: # %vector.body4280 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_594: # %vector.body4456 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_594 -# %bb.595: # %vector.body4285.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.595: # %vector.body4464.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_596: # %vector.body4285 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_596: # %vector.body4464 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_596 b .LBB5_573 .LBB5_597: # %.preheader34.i2253.preheader @@ -7734,42 +7624,41 @@ init: # @init lu12i.w $a1, 220 ori $a1, $a1, 1212 b .LBB5_630 -.LBB5_600: # %vector.body4263.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_601: # %vector.body4263 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_601 - b .LBB5_573 -.LBB5_602: # %vector.body4256.preheader +.LBB5_600: # %vector.body4433.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_601: # %vector.body4433 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_601 + b .LBB5_573 +.LBB5_602: # %vector.body4426.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_603: # %vector.body4256 +.LBB5_603: # %vector.body4426 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -7804,45 +7693,44 @@ init: # @init pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) b .LBB5_610 -.LBB5_605: # %vector.body4251.preheader +.LBB5_605: # %vector.body4418.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_606: # %vector.body4251 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_606 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_606: # %vector.body4418 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_606 .LBB5_607: # %set1d.exit2229 lu12i.w $a1, 31 ori $a1, $a1, 1020 lu12i.w $a2, -262144 b .LBB5_687 -.LBB5_608: # %vector.body4246.preheader +.LBB5_608: # %vector.body4413.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_609: # %vector.body4246 +.LBB5_609: # %vector.body4413 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7855,14 +7743,14 @@ init: # @init add.d $a0, $a0, $a2 ori $a2, $a1, 1024 b .LBB5_346 -.LBB5_611: # %vector.body4241.preheader +.LBB5_611: # %vector.body4408.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_612: # %vector.body4241 +.LBB5_612: # %vector.body4408 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7886,147 +7774,141 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 b .LBB5_573 -.LBB5_614: # %vector.body4216.preheader +.LBB5_614: # %vector.body4374.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_615: # %vector.body4216 +.LBB5_615: # %vector.body4374 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_615 -# %bb.616: # %vector.body4221.preheader +# %bb.616: # %vector.body4379.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_617: # %vector.body4221 +.LBB5_617: # %vector.body4379 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_617 -# %bb.618: # %vector.body4226.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.618: # %vector.body4384.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_619: # %vector.body4226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_619: # %vector.body4384 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_619 -# %bb.620: # %vector.body4231.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.620: # %vector.body4392.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_621: # %vector.body4231 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_621: # %vector.body4392 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_621 -# %bb.622: # %vector.body4236.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.622: # %vector.body4400.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_623: # %vector.body4236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_623: # %vector.body4400 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_623 b .LBB5_573 -.LBB5_624: # %vector.body4211.preheader +.LBB5_624: # %vector.body4366.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_625: # %vector.body4211 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_625 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_625: # %vector.body4366 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_625 # %bb.626: # %set1d.exit2327 lu12i.w $a1, 31 b .LBB5_686 -.LBB5_627: # %vector.body4204.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_627: # %vector.body4359.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_628: # %vector.body4204 +.LBB5_628: # %vector.body4359 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8075,81 +7957,77 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ori $a0, $s0, 1044 + ori $a0, $s0, 1048 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_632: # %vector.body4199 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_632 - b .LBB5_573 -.LBB5_633: # %vector.body4189.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_634: # %vector.body4189 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_634 -# %bb.635: # %vector.body4194.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_636: # %vector.body4194 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_632: # %vector.body4351 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_632 + b .LBB5_573 +.LBB5_633: # %vector.body4335.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_634: # %vector.body4335 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_634 +# %bb.635: # %vector.body4343.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_636: # %vector.body4343 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_636 b .LBB5_573 .LBB5_637: # %.preheader34.i2352.preheader @@ -8248,26 +8126,26 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_640 b .LBB5_573 -.LBB5_641: # %vector.body4167.preheader +.LBB5_641: # %vector.body4313.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_642: # %vector.body4167 +.LBB5_642: # %vector.body4313 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_642 -# %bb.643: # %vector.body4172.preheader +# %bb.643: # %vector.body4318.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_644: # %vector.body4172 +.LBB5_644: # %vector.body4318 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8275,78 +8153,75 @@ init: # @init addi.d $a2, $a2, 64 bnez $a1, .LBB5_644 b .LBB5_653 -.LBB5_645: # %vector.body4157.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_646: # %vector.body4157 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_646 -# %bb.647: # %vector.body4162.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_648: # %vector.body4162 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_645: # %vector.body4297.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_646: # %vector.body4297 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_646 +# %bb.647: # %vector.body4305.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_648: # %vector.body4305 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_648 b .LBB5_573 -.LBB5_649: # %vector.body4147.preheader +.LBB5_649: # %vector.body4287.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_650: # %vector.body4147 +.LBB5_650: # %vector.body4287 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_650 -# %bb.651: # %vector.body4152.preheader +# %bb.651: # %vector.body4292.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_652: # %vector.body4152 +.LBB5_652: # %vector.body4292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8359,134 +8234,128 @@ init: # @init lu12i.w $a2, 260096 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_654: # %vector.body4132.preheader +.LBB5_654: # %vector.body4266.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_655: # %vector.body4132 +.LBB5_655: # %vector.body4266 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_655 -# %bb.656: # %vector.body4137.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.656: # %vector.body4271.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_657: # %vector.body4137 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_657: # %vector.body4271 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_657 -# %bb.658: # %vector.body4142.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.658: # %vector.body4279.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_659: # %vector.body4142 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_659: # %vector.body4279 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_659 b .LBB5_573 -.LBB5_660: # %vector.body4117.preheader +.LBB5_660: # %vector.body4245.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_661: # %vector.body4117 +.LBB5_661: # %vector.body4245 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_661 -# %bb.662: # %vector.body4122.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.662: # %vector.body4250.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_663: # %vector.body4122 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_663: # %vector.body4250 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_663 -# %bb.664: # %vector.body4127.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.664: # %vector.body4258.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_665: # %vector.body4127 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_665: # %vector.body4258 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_665 b .LBB5_573 .LBB5_666: # %.preheader.i2421.preheader @@ -8503,74 +8372,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_667: # %vector.body4097 +.LBB5_667: # %vector.body4219 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_667 -# %bb.668: # %vector.body4102.preheader +# %bb.668: # %vector.body4224.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_669: # %vector.body4102 +.LBB5_669: # %vector.body4224 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_669 -# %bb.670: # %vector.body4107.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.670: # %vector.body4229.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_671: # %vector.body4107 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_671: # %vector.body4229 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_671 -# %bb.672: # %vector.body4112.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.672: # %vector.body4237.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_673: # %vector.body4112 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_673: # %vector.body4237 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_673 b .LBB5_573 .LBB5_674: # %.preheader.i2443.preheader @@ -8719,69 +8585,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_679 b .LBB5_573 -.LBB5_680: # %vector.body4063.preheader +.LBB5_680: # %vector.body4179.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 xvldi $xr0, -1424 -.LBB5_681: # %vector.body4063 +.LBB5_681: # %vector.body4179 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a3, $a3, -16 addi.d $a1, $a1, 64 bnez $a3, .LBB5_681 -# %bb.682: # %vector.body4068.preheader +# %bb.682: # %vector.body4184.preheader lu12i.w $a1, 31 - ori $a3, $a1, 1044 - add.d $a3, $a0, $a3 - ori $a4, $zero, 2 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a4, $a1, 1048 + add.d $a4, $a0, $a4 ori $a5, $a2, 3328 -.LBB5_683: # %vector.body4068 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - addi.d $a7, $a4, -1 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa0, $a7 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a5, $a5, -2 - addi.w $a4, $a4, 2 +.LBB5_683: # %vector.body4184 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a4, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a5, $a5, -4 + addi.d $a4, $a4, 16 bnez $a5, .LBB5_683 -# %bb.684: # %vector.body4073.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2100 +# %bb.684: # %vector.body4192.preheader + lu12i.w $a4, 62 + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a3, $a4, 2104 add.d $a3, $a0, $a3 - ori $a4, $zero, 2 ori $a2, $a2, 3328 -.LBB5_685: # %vector.body4073 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a4, 31, 0 - addi.d $a6, $a4, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a2, $a2, -2 - addi.w $a4, $a4, 2 +.LBB5_685: # %vector.body4192 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a3, $a3, 16 bnez $a2, .LBB5_685 .LBB5_686: # %set1d.exit2479 ori $a1, $a1, 1020 @@ -8790,16 +8653,16 @@ init: # @init lu32i.d $a2, 0 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_688: # %vector.body4051.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_688: # %vector.body4167.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_689: # %vector.body4051 +.LBB5_689: # %vector.body4167 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8830,12 +8693,12 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_689 -# %bb.690: # %vector.body4058.preheader +# %bb.690: # %vector.body4174.preheader pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 xvldi $xr0, -1424 -.LBB5_691: # %vector.body4058 +.LBB5_691: # %vector.body4174 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -8843,28 +8706,28 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_691 b .LBB5_573 -.LBB5_692: # %vector.body4039.preheader +.LBB5_692: # %vector.body4155.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_693: # %vector.body4039 +.LBB5_693: # %vector.body4155 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_693 -# %bb.694: # %vector.body4044.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.694: # %vector.body4160.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_695: # %vector.body4044 +.LBB5_695: # %vector.body4160 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8904,15 +8767,15 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_697: # %vector.body4032 +.LBB5_697: # %vector.body4148 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8944,28 +8807,28 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_697 b .LBB5_573 -.LBB5_698: # %vector.body4020.preheader +.LBB5_698: # %vector.body4136.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_699: # %vector.body4020 +.LBB5_699: # %vector.body4136 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_699 -# %bb.700: # %vector.body4025.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.700: # %vector.body4141.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_701: # %vector.body4025 +.LBB5_701: # %vector.body4141 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8997,30 +8860,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_701 b .LBB5_573 -.LBB5_702: # %vector.body4008.preheader +.LBB5_702: # %vector.body4124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_703: # %vector.body4008 +.LBB5_703: # %vector.body4124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_703 -# %bb.704: # %vector.body4013.preheader +# %bb.704: # %vector.body4129.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_705: # %vector.body4013 +.LBB5_705: # %vector.body4129 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9052,30 +8915,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_705 b .LBB5_573 -.LBB5_706: # %vector.body3996.preheader +.LBB5_706: # %vector.body4112.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_707: # %vector.body3996 +.LBB5_707: # %vector.body4112 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_707 -# %bb.708: # %vector.body4001.preheader +# %bb.708: # %vector.body4117.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_709: # %vector.body4001 +.LBB5_709: # %vector.body4117 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9107,7 +8970,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_709 b .LBB5_573 -.LBB5_710: # %vector.body3971.preheader +.LBB5_710: # %vector.body4081.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 @@ -9115,7 +8978,7 @@ init: # @init ori $a1, $s0, 3328 xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill -.LBB5_711: # %vector.body3971 +.LBB5_711: # %vector.body4081 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9123,63 +8986,60 @@ init: # @init addi.d $a1, $a1, -16 addi.d $a0, $a0, 64 bnez $a1, .LBB5_711 -# %bb.712: # %vector.body3976.preheader +# %bb.712: # %vector.body4086.preheader lu12i.w $a1, 31 - ori $a0, $a1, 1044 - add.d $a0, $fp, $a0 - ori $a2, $zero, 2 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a2, $a1, 1048 + add.d $a2, $fp, $a2 ori $a3, $s0, 3328 -.LBB5_713: # %vector.body3976 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_713: # %vector.body4086 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_713 -# %bb.714: # %vector.body3981.preheader - lu12i.w $a0, 62 - ori $a0, $a0, 2100 +# %bb.714: # %vector.body4094.preheader + lu12i.w $a2, 62 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a2, 2104 add.d $a0, $fp, $a0 - ori $a2, $zero, 2 - ori $a3, $s0, 3328 -.LBB5_715: # %vector.body3981 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_715 -# %bb.716: # %vector.body3986.preheader + ori $a2, $s0, 3328 +.LBB5_715: # %vector.body4094 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a0, $a0, 16 + bnez $a2, .LBB5_715 +# %bb.716: # %vector.body4102.preheader lu12i.w $a0, 93 ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 xvldi $xr0, -1296 -.LBB5_717: # %vector.body3986 +.LBB5_717: # %vector.body4102 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -9198,7 +9058,7 @@ init: # @init ori $a0, $a0, 2500 add.d $a0, $fp, $a0 ori $a1, $s0, 3328 -.LBB5_719: # %vector.body3991 +.LBB5_719: # %vector.body4107 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9207,238 +9067,225 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_719 b .LBB5_573 -.LBB5_720: # %vector.body3946.preheader +.LBB5_720: # %vector.body4044.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_721: # %vector.body3946 +.LBB5_721: # %vector.body4044 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_721 -# %bb.722: # %vector.body3951.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.722: # %vector.body4049.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_723: # %vector.body3951 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_723: # %vector.body4049 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_723 -# %bb.724: # %vector.body3956.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.724: # %vector.body4057.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_725: # %vector.body3956 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_725: # %vector.body4057 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_725 -# %bb.726: # %vector.body3961.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.726: # %vector.body4065.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_727: # %vector.body3961 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_727: # %vector.body4065 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_727 -# %bb.728: # %vector.body3966.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.728: # %vector.body4073.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_729: # %vector.body3966 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_729: # %vector.body4073 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_729 b .LBB5_573 -.LBB5_730: # %vector.body3931.preheader +.LBB5_730: # %vector.body4023.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_731: # %vector.body3931 +.LBB5_731: # %vector.body4023 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_731 -# %bb.732: # %vector.body3936.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.732: # %vector.body4028.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_733: # %vector.body3936 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_733: # %vector.body4028 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_733 -# %bb.734: # %vector.body3941.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.734: # %vector.body4036.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_735: # %vector.body3941 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_735: # %vector.body4036 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_735 b .LBB5_573 -.LBB5_736: # %vector.body3921.preheader +.LBB5_736: # %vector.body4007.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_737: # %vector.body3921 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_737: # %vector.body4007 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_737 -# %bb.738: # %vector.body3926.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.738: # %vector.body4015.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_739: # %vector.body3926 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_739: # %vector.body4015 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_739 b .LBB5_573 .LBB5_740: # %.preheader.i2608.preheader @@ -9455,14 +9302,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_741: # %vector.body3911 +.LBB5_741: # %vector.body3997 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_741 -# %bb.742: # %vector.body3916.preheader +# %bb.742: # %vector.body4002.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -9470,7 +9317,7 @@ init: # @init lu12i.w $a2, 219235 ori $a2, $a2, 1981 xvreplgr2vr.w $xr0, $a2 -.LBB5_743: # %vector.body3916 +.LBB5_743: # %vector.body4002 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -9488,14 +9335,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_745: # %vector.body3904 +.LBB5_745: # %vector.body3990 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9527,248 +9374,237 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_745 b .LBB5_573 -.LBB5_746: # %vector.body3879.preheader +.LBB5_746: # %vector.body3959.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_747: # %vector.body3879 +.LBB5_747: # %vector.body3959 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_747 -# %bb.748: # %vector.body3884.preheader +# %bb.748: # %vector.body3964.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_749: # %vector.body3884 +.LBB5_749: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_749 -# %bb.750: # %vector.body3889.preheader +# %bb.750: # %vector.body3969.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_751: # %vector.body3889 +.LBB5_751: # %vector.body3969 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_751 -# %bb.752: # %vector.body3894.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.752: # %vector.body3974.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_753: # %vector.body3894 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_753: # %vector.body3974 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_753 -# %bb.754: # %vector.body3899.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.754: # %vector.body3982.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_755: # %vector.body3899 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_755: # %vector.body3982 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_755 b .LBB5_573 -.LBB5_756: # %vector.body3859.preheader +.LBB5_756: # %vector.body3930.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_757: # %vector.body3859 +.LBB5_757: # %vector.body3930 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_757 -# %bb.758: # %vector.body3864.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.758: # %vector.body3935.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_759: # %vector.body3864 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_759: # %vector.body3935 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_759 -# %bb.760: # %vector.body3869.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.760: # %vector.body3943.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_761: # %vector.body3869 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_761: # %vector.body3943 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_761 -# %bb.762: # %vector.body3874.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.762: # %vector.body3951.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_763: # %vector.body3874 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_763: # %vector.body3951 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_763 b .LBB5_573 -.LBB5_764: # %vector.body3844.preheader +.LBB5_764: # %vector.body3909.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_765: # %vector.body3844 +.LBB5_765: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_765 -# %bb.766: # %vector.body3849.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.766: # %vector.body3914.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_767: # %vector.body3849 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_767: # %vector.body3914 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_767 -# %bb.768: # %vector.body3854.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.768: # %vector.body3922.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_769: # %vector.body3854 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_769: # %vector.body3922 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_769 b .LBB5_573 .LBB5_770: # %.preheader.i2684.preheader @@ -9785,103 +9621,99 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_771: # %vector.body3829 +.LBB5_771: # %vector.body3888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_771 -# %bb.772: # %vector.body3834.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.772: # %vector.body3893.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_773: # %vector.body3834 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_773: # %vector.body3893 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_773 -# %bb.774: # %vector.body3839.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.774: # %vector.body3901.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_775: # %vector.body3839 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_775: # %vector.body3901 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_775 b .LBB5_573 -.LBB5_776: # %vector.body3819.preheader +.LBB5_776: # %vector.body3875.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 32 - lu12i.w $a2, 7 - ori $a3, $a2, 3328 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_777: # %vector.body3819 +.LBB5_777: # %vector.body3875 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 - addi.d $a1, $a1, 64 + addi.d $a2, $a2, 64 bnez $a3, .LBB5_777 -# %bb.778: # %vector.body3824.preheader - lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a0, $a0, $a1 - ori $a1, $zero, 2 - ori $a2, $a2, 3328 -.LBB5_779: # %vector.body3824 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_779 +# %bb.778: # %vector.body3880.preheader + lu12i.w $a2, 31 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 1048 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_779: # %vector.body3880 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_779 b .LBB5_573 .LBB5_780: # %.preheader.i2711.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9897,23 +9729,23 @@ init: # @init lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_781: # %vector.body3807 +.LBB5_781: # %vector.body3863 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_781 -# %bb.782: # %vector.body3812.preheader +# %bb.782: # %vector.body3868.preheader lu12i.w $a0, 62 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) pcalau12i $a2, %pc_hi20(.LCPI5_1) - xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_2) + xvld $xr1, $a2, %pc_lo12(.LCPI5_2) ori $a0, $a0, 2096 add.d $a0, $fp, $a0 ori $a1, $a1, 3328 -.LBB5_783: # %vector.body3812 +.LBB5_783: # %vector.body3868 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9959,142 +9791,135 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_785: # %vector.body3792 +.LBB5_785: # %vector.body3842 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_785 -# %bb.786: # %vector.body3797.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.786: # %vector.body3847.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_787: # %vector.body3797 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_787: # %vector.body3847 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_787 -# %bb.788: # %vector.body3802.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.788: # %vector.body3855.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_789: # %vector.body3802 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_789: # %vector.body3855 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_789 b .LBB5_573 -.LBB5_790: # %vector.body3782.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_791: # %vector.body3782 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_790: # %vector.body3826.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_791: # %vector.body3826 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_791 -# %bb.792: # %vector.body3787.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_793: # %vector.body3787 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.792: # %vector.body3834.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_793: # %vector.body3834 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_793 b .LBB5_573 -.LBB5_794: # %vector.body3770.preheader +.LBB5_794: # %vector.body3811.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_795: # %vector.body3770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_795 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_795: # %vector.body3811 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_795 # %bb.796: # %.preheader34.i2757.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -10160,127 +9985,121 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_799: # %vector.body3755 +.LBB5_799: # %vector.body3790 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_799 -# %bb.800: # %vector.body3760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.800: # %vector.body3795.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_801: # %vector.body3760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_801: # %vector.body3795 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_801 -# %bb.802: # %vector.body3765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.802: # %vector.body3803.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_803: # %vector.body3765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_803: # %vector.body3803 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_803 b .LBB5_573 -.LBB5_804: # %vector.body3740.preheader +.LBB5_804: # %vector.body3769.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_805: # %vector.body3740 +.LBB5_805: # %vector.body3769 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_805 -# %bb.806: # %vector.body3745.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.806: # %vector.body3774.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_807: # %vector.body3745 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_807: # %vector.body3774 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_807 -# %bb.808: # %vector.body3750.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.808: # %vector.body3782.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_809: # %vector.body3750 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_809: # %vector.body3782 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_809 b .LBB5_573 .LBB5_810: # %.preheader.i2797.preheader @@ -10293,14 +10112,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_811: # %vector.body3733 +.LBB5_811: # %vector.body3762 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10342,14 +10161,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_813: # %vector.body3726 +.LBB5_813: # %vector.body3755 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10391,14 +10210,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_815: # %vector.body3719 +.LBB5_815: # %vector.body3748 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10440,14 +10259,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_817: # %vector.body3712 +.LBB5_817: # %vector.body3741 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10489,14 +10308,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_819: # %vector.body3705 +.LBB5_819: # %vector.body3734 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10528,26 +10347,26 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_819 b .LBB5_573 -.LBB5_820: # %vector.body3695.preheader +.LBB5_820: # %vector.body3724.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_821: # %vector.body3695 +.LBB5_821: # %vector.body3724 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_821 -# %bb.822: # %vector.body3700.preheader +# %bb.822: # %vector.body3729.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_823: # %vector.body3700 +.LBB5_823: # %vector.body3729 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10555,95 +10374,92 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_823 b .LBB5_573 -.LBB5_824: # %vector.body3680.preheader +.LBB5_824: # %vector.body3703.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_825: # %vector.body3680 +.LBB5_825: # %vector.body3703 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_825 -# %bb.826: # %vector.body3685.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.826: # %vector.body3708.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_827: # %vector.body3685 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_827: # %vector.body3708 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_827 -# %bb.828: # %vector.body3690.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.828: # %vector.body3716.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_829: # %vector.body3690 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_829: # %vector.body3716 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_829 b .LBB5_573 -.LBB5_830: # %vector.body3668.preheader +.LBB5_830: # %vector.body3691.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_831: # %vector.body3668 +.LBB5_831: # %vector.body3691 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_831 -# %bb.832: # %vector.body3673.preheader +# %bb.832: # %vector.body3696.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_833: # %vector.body3673 +.LBB5_833: # %vector.body3696 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10675,16 +10491,16 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_833 b .LBB5_573 -.LBB5_834: # %vector.body3653.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_834: # %vector.body3674.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_835: # %vector.body3653 +.LBB5_835: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10715,7 +10531,7 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_835 -# %bb.836: # %vector.body3658.preheader +# %bb.836: # %vector.body3681.preheader pcalau12i $a1, %pc_hi20(global_data) addi.d $a1, $a1, %pc_lo12(global_data) lu12i.w $a2, 31 @@ -10723,20 +10539,20 @@ init: # @init add.d $a2, $a1, $a2 ori $a3, $a0, 3328 xvldi $xr0, -1424 -.LBB5_837: # %vector.body3658 +.LBB5_837: # %vector.body3681 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_837 -# %bb.838: # %vector.body3663.preheader +# %bb.838: # %vector.body3686.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 xvldi $xr0, -1296 -.LBB5_839: # %vector.body3663 +.LBB5_839: # %vector.body3686 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -10744,40 +10560,40 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_839 b .LBB5_573 -.LBB5_840: # %vector.body3638.preheader +.LBB5_840: # %vector.body3659.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_841: # %vector.body3638 +.LBB5_841: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_841 -# %bb.842: # %vector.body3643.preheader +# %bb.842: # %vector.body3664.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -3264 -.LBB5_843: # %vector.body3643 +.LBB5_843: # %vector.body3664 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_843 -# %bb.844: # %vector.body3648.preheader +# %bb.844: # %vector.body3669.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3265 -.LBB5_845: # %vector.body3648 +.LBB5_845: # %vector.body3669 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10785,82 +10601,78 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_845 b .LBB5_573 -.LBB5_846: # %vector.body3633.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_847: # %vector.body3633 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_847 - b .LBB5_573 -.LBB5_848: # %vector.body3623.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 +.LBB5_846: # %vector.body3651.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_849: # %vector.body3623 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_849 -# %bb.850: # %vector.body3628.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_851: # %vector.body3628 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_847: # %vector.body3651 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_847 + b .LBB5_573 +.LBB5_848: # %vector.body3635.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_849: # %vector.body3635 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_849 +# %bb.850: # %vector.body3643.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_851: # %vector.body3643 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_851 b .LBB5_573 .Lfunc_end5: diff --git a/results/MultiSource/Benchmarks/TSVC/Recurrences-flt/CMakeFiles/Recurrences-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Recurrences-flt/CMakeFiles/Recurrences-flt.dir/tsc.s index 1f17689..440086b 100644 --- a/results/MultiSource/Benchmarks/TSVC/Recurrences-flt/CMakeFiles/Recurrences-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Recurrences-flt/CMakeFiles/Recurrences-flt.dir/tsc.s @@ -11,6 +11,11 @@ .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI0_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1d .p2align 5 @@ -63,30 +68,29 @@ set1d: # @set1d bnez $a1, .LBB0_3 b .LBB0_8 .LBB0_4: # %vector.body40.preheader - addi.d $a0, $a0, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a1, %pc_hi20(.LCPI0_2) + vld $vr0, $a1, %pc_lo12(.LCPI0_2) + addi.d $a0, $a0, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB0_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB0_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB0_5 b .LBB0_8 .LBB0_6: # %.preheader.preheader move $a2, $zero @@ -118,6 +122,11 @@ set1d: # @set1d .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI1_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1ds .p2align 5 @@ -170,30 +179,29 @@ set1ds: # @set1ds bnez $a0, .LBB1_3 b .LBB1_8 .LBB1_4: # %vector.body40.preheader - addi.d $a0, $a1, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) + addi.d $a0, $a1, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB1_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB1_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB1_5 b .LBB1_8 .LBB1_6: # %.preheader.preheader move $a0, $zero @@ -685,14 +693,19 @@ check: # @check .word .LBB4_14-.LJTI4_0 .word .LBB4_17-.LJTI4_0 # -- End function - .section .rodata.cst32,"aM",@progbits,32 - .p2align 5, 0x0 # -- Begin function init + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 # -- Begin function init .LCPI5_0: + .dword 0 # 0x0 + .dword 1 # 0x1 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 +.LCPI5_1: .dword 4 # 0x4 .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 -.LCPI5_1: +.LCPI5_2: .dword 0 # 0x0 .dword 1 # 0x1 .dword 2 # 0x2 @@ -703,11 +716,12 @@ check: # @check .type init,@function init: # @init # %bb.0: - addi.d $sp, $sp, -80 - st.d $ra, $sp, 72 # 8-byte Folded Spill - st.d $fp, $sp, 64 # 8-byte Folded Spill - st.d $s0, $sp, 56 # 8-byte Folded Spill - st.d $s1, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -96 + st.d $ra, $sp, 88 # 8-byte Folded Spill + st.d $fp, $sp, 80 # 8-byte Folded Spill + st.d $s0, $sp, 72 # 8-byte Folded Spill + st.d $s1, $sp, 64 # 8-byte Folded Spill + st.d $s2, $sp, 56 # 8-byte Folded Spill move $fp, $a0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a1, $a0, %pc_lo12(.L.str.1) @@ -1661,131 +1675,122 @@ init: # @init jirl $ra, $ra, 0 bnez $a0, .LBB5_573 # %bb.136: # %vector.body.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 + addi.d $a3, $a0, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 .LBB5_137: # %vector.body # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_137 # %bb.138: # %vector.body3598.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 + lu12i.w $a3, 31 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 .LBB5_139: # %vector.body3598 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_139 -# %bb.140: # %vector.body3603.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_141: # %vector.body3603 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.140: # %vector.body3606.preheader + lu12i.w $a3, 62 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_141: # %vector.body3606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_141 -# %bb.142: # %vector.body3608.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_143: # %vector.body3608 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.142: # %vector.body3614.preheader + lu12i.w $a3, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_143: # %vector.body3614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_143 -# %bb.144: # %vector.body3613.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_145: # %vector.body3613 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 - bnez $a1, .LBB5_145 +# %bb.144: # %vector.body3622.preheader + lu12i.w $a3, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a3, 136 + add.d $a1, $a0, $a1 + ori $a2, $a2, 3328 +.LBB5_145: # %vector.body3622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_145 # %bb.146: # %.preheader34.i2938.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -1837,7 +1842,7 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_147 b .LBB5_573 -.LBB5_148: # %vector.body5445.preheader +.LBB5_148: # %vector.body5846.preheader pcalau12i $a0, %pc_hi20(X+4) addi.d $a0, $a0, %pc_lo12(X+4) pcalau12i $a1, %pc_hi20(V+4) @@ -1852,7 +1857,7 @@ init: # @init lu12i.w $a6, 7 ori $a6, $a6, 3328 .p2align 4, , 16 -.LBB5_149: # %vector.body5445 +.LBB5_149: # %vector.body5846 # =>This Inner Loop Header: Depth=1 bstrpick.d $a7, $a5, 31, 0 addi.d $t0, $a5, -5 @@ -1896,7 +1901,7 @@ init: # @init addi.d $a4, $a4, 8 bnez $a6, .LBB5_149 b .LBB5_573 -.LBB5_150: # %vector.body5412.preheader +.LBB5_150: # %vector.body5813.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -1904,24 +1909,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_151: # %vector.body5412 +.LBB5_151: # %vector.body5813 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_151 -# %bb.152: # %vector.body5417.preheader +# %bb.152: # %vector.body5818.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_153: # %vector.body5417 +.LBB5_153: # %vector.body5818 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1952,15 +1957,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_153 -# %bb.154: # %vector.body5424.preheader +# %bb.154: # %vector.body5825.preheader lu12i.w $a4, 62 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 2096 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_155: # %vector.body5424 +.LBB5_155: # %vector.body5825 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1991,15 +1996,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_155 -# %bb.156: # %vector.body5431.preheader +# %bb.156: # %vector.body5832.preheader lu12i.w $a4, 93 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 3152 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_157: # %vector.body5431 +.LBB5_157: # %vector.body5832 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2030,15 +2035,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_157 -# %bb.158: # %vector.body5438.preheader +# %bb.158: # %vector.body5839.preheader lu12i.w $a4, 125 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_159: # %vector.body5438 +.LBB5_159: # %vector.body5839 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2070,7 +2075,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_159 b .LBB5_573 -.LBB5_160: # %vector.body5400.preheader +.LBB5_160: # %vector.body5801.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2078,24 +2083,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_161: # %vector.body5400 +.LBB5_161: # %vector.body5801 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_161 -# %bb.162: # %vector.body5405.preheader +# %bb.162: # %vector.body5806.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_163: # %vector.body5405 +.LBB5_163: # %vector.body5806 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2127,7 +2132,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_163 b .LBB5_573 -.LBB5_164: # %vector.body5388.preheader +.LBB5_164: # %vector.body5789.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2135,24 +2140,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_165: # %vector.body5388 +.LBB5_165: # %vector.body5789 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_165 -# %bb.166: # %vector.body5393.preheader +# %bb.166: # %vector.body5794.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_167: # %vector.body5393 +.LBB5_167: # %vector.body5794 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2288,7 +2293,7 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_171 b .LBB5_573 -.LBB5_172: # %vector.body5354.preheader +.LBB5_172: # %vector.body5755.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2296,7 +2301,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_173: # %vector.body5354 +.LBB5_173: # %vector.body5755 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2436,14 +2441,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_179 b .LBB5_573 -.LBB5_180: # %vector.body5349.preheader +.LBB5_180: # %vector.body5750.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_181: # %vector.body5349 +.LBB5_181: # %vector.body5750 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -2451,7 +2456,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_181 b .LBB5_573 -.LBB5_182: # %vector.body5339.preheader +.LBB5_182: # %vector.body5740.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2459,7 +2464,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_183: # %vector.body5339 +.LBB5_183: # %vector.body5740 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2610,30 +2615,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_189 b .LBB5_573 -.LBB5_190: # %vector.body5315.preheader +.LBB5_190: # %vector.body5716.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_191: # %vector.body5315 +.LBB5_191: # %vector.body5716 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_191 -# %bb.192: # %vector.body5320.preheader +# %bb.192: # %vector.body5721.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_193: # %vector.body5320 +.LBB5_193: # %vector.body5721 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2665,30 +2670,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_193 b .LBB5_573 -.LBB5_194: # %vector.body5303.preheader +.LBB5_194: # %vector.body5704.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_195: # %vector.body5303 +.LBB5_195: # %vector.body5704 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_195 -# %bb.196: # %vector.body5308.preheader +# %bb.196: # %vector.body5709.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_197: # %vector.body5308 +.LBB5_197: # %vector.body5709 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2734,74 +2739,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_199: # %vector.body5283 +.LBB5_199: # %vector.body5678 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_199 -# %bb.200: # %vector.body5288.preheader +# %bb.200: # %vector.body5683.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_201: # %vector.body5288 +.LBB5_201: # %vector.body5683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_201 -# %bb.202: # %vector.body5293.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.202: # %vector.body5688.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_203: # %vector.body5293 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_203: # %vector.body5688 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_203 -# %bb.204: # %vector.body5298.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.204: # %vector.body5696.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_205: # %vector.body5298 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_205: # %vector.body5696 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_205 b .LBB5_573 .LBB5_206: # %.preheader.i1195.preheader @@ -2818,74 +2820,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_207: # %vector.body5263 +.LBB5_207: # %vector.body5652 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_207 -# %bb.208: # %vector.body5268.preheader +# %bb.208: # %vector.body5657.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_209: # %vector.body5268 +.LBB5_209: # %vector.body5657 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_209 -# %bb.210: # %vector.body5273.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.210: # %vector.body5662.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_211: # %vector.body5273 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_211: # %vector.body5662 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_211 -# %bb.212: # %vector.body5278.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.212: # %vector.body5670.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_213: # %vector.body5278 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_213: # %vector.body5670 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_213 b .LBB5_573 .LBB5_214: # %.preheader.i1217.preheader @@ -3072,31 +3071,30 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_221 -# %bb.222: # %vector.body5236.preheader - pcalau12i $a1, %pc_hi20(array+4) - addi.d $a1, $a1, %pc_lo12(array+4) - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_223: # %vector.body5236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_223 +# %bb.222: # %vector.body5622.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(array+8) + addi.d $a1, $a1, %pc_lo12(array+8) + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_223: # %vector.body5622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_223 # %bb.224: # %.preheader34.i1253.preheader lu12i.w $a1, 284 ori $a1, $a1, 1888 @@ -3162,87 +3160,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_227: # %vector.body5211 +.LBB5_227: # %vector.body5588 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_227 -# %bb.228: # %vector.body5216.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.228: # %vector.body5593.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_229: # %vector.body5216 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_229: # %vector.body5593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_229 -# %bb.230: # %vector.body5221.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.230: # %vector.body5601.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_231: # %vector.body5221 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_231: # %vector.body5601 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_231 -# %bb.232: # %vector.body5226.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.232: # %vector.body5609.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_233: # %vector.body5226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_233: # %vector.body5609 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_233 b .LBB5_573 .LBB5_234: # %.preheader.i1284.preheader @@ -3259,32 +3252,32 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -3264 -.LBB5_235: # %vector.body5196 +.LBB5_235: # %vector.body5573 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_235 -# %bb.236: # %vector.body5201.preheader +# %bb.236: # %vector.body5578.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_237: # %vector.body5201 +.LBB5_237: # %vector.body5578 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_237 -# %bb.238: # %vector.body5206.preheader +# %bb.238: # %vector.body5583.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_239: # %vector.body5206 +.LBB5_239: # %vector.body5583 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -3292,30 +3285,30 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_239 b .LBB5_573 -.LBB5_240: # %vector.body5184.preheader +.LBB5_240: # %vector.body5561.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_241: # %vector.body5184 +.LBB5_241: # %vector.body5561 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_241 -# %bb.242: # %vector.body5189.preheader +# %bb.242: # %vector.body5566.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_243: # %vector.body5189 +.LBB5_243: # %vector.body5566 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3392,65 +3385,62 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_245 -# %bb.246: # %vector.body5174.preheader +# %bb.246: # %vector.body5545.preheader lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_247: # %vector.body5174 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_247: # %vector.body5545 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_247 -# %bb.248: # %vector.body5179.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.248: # %vector.body5553.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_249: # %vector.body5179 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_249: # %vector.body5553 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_249 b .LBB5_573 -.LBB5_250: # %vector.body5157.preheader +.LBB5_250: # %vector.body5528.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_251: # %vector.body5157 +.LBB5_251: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3506,30 +3496,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_253 b .LBB5_573 -.LBB5_254: # %vector.body5145.preheader +.LBB5_254: # %vector.body5516.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_255: # %vector.body5145 +.LBB5_255: # %vector.body5516 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_255 -# %bb.256: # %vector.body5150.preheader +# %bb.256: # %vector.body5521.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_257: # %vector.body5150 +.LBB5_257: # %vector.body5521 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3561,14 +3551,14 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_257 b .LBB5_573 -.LBB5_258: # %vector.body5125.preheader +.LBB5_258: # %vector.body5487.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 xvldi $xr0, -1424 -.LBB5_259: # %vector.body5125 +.LBB5_259: # %vector.body5487 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3583,274 +3573,263 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 + lu12i.w $a1, 62 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 2104 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_261: # %vector.body5130 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_261: # %vector.body5492 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_261 -# %bb.262: # %vector.body5135.preheader - lu12i.w $a0, 93 - ori $a0, $a0, 3156 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 +# %bb.262: # %vector.body5500.preheader + lu12i.w $a1, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 3160 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_263: # %vector.body5135 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_263: # %vector.body5500 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_263 -# %bb.264: # %vector.body5140.preheader - lu12i.w $a0, 125 - ori $a0, $a0, 132 +# %bb.264: # %vector.body5508.preheader + lu12i.w $a1, 125 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a1, 136 add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - ori $a2, $s0, 3328 -.LBB5_265: # %vector.body5140 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_265 + ori $a1, $s0, 3328 +.LBB5_265: # %vector.body5508 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_265 b .LBB5_573 -.LBB5_266: # %vector.body5094.preheader +.LBB5_266: # %vector.body5450.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_267: # %vector.body5094 +.LBB5_267: # %vector.body5450 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_267 -# %bb.268: # %vector.body5099.preheader +# %bb.268: # %vector.body5455.preheader lu12i.w $a2, 31 ori $a3, $a2, 1048 add.d $a4, $a0, $a3 lu12i.w $a3, 3 ori $a5, $a3, 3712 lu12i.w $a6, 260096 -.LBB5_269: # %vector.body5099 +.LBB5_269: # %vector.body5455 # =>This Inner Loop Header: Depth=1 st.w $a6, $a4, -8 st.w $a6, $a4, 0 addi.d $a5, $a5, -2 addi.d $a4, $a4, 16 bnez $a5, .LBB5_269 -# %bb.270: # %vector.body5104.preheader +# %bb.270: # %vector.body5460.preheader ori $a2, $a2, 1052 add.d $a2, $a0, $a2 ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 -.LBB5_271: # %vector.body5104 +.LBB5_271: # %vector.body5460 # =>This Inner Loop Header: Depth=1 st.w $a4, $a2, -8 st.w $a4, $a2, 0 addi.d $a3, $a3, -2 addi.d $a2, $a2, 16 bnez $a3, .LBB5_271 -# %bb.272: # %vector.body5110.preheader +# %bb.272: # %vector.body5466.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_273: # %vector.body5110 +.LBB5_273: # %vector.body5466 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_273 -# %bb.274: # %vector.body5115.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.274: # %vector.body5471.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_275: # %vector.body5115 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_275: # %vector.body5471 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_275 -# %bb.276: # %vector.body5120.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.276: # %vector.body5479.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_277: # %vector.body5120 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_277: # %vector.body5479 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_277 b .LBB5_573 -.LBB5_278: # %vector.body5079.preheader +.LBB5_278: # %vector.body5429.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_279: # %vector.body5079 +.LBB5_279: # %vector.body5429 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_279 -# %bb.280: # %vector.body5084.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.280: # %vector.body5434.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_281: # %vector.body5084 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_281: # %vector.body5434 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_281 -# %bb.282: # %vector.body5089.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.282: # %vector.body5442.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_283: # %vector.body5089 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_283: # %vector.body5442 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_283 b .LBB5_573 -.LBB5_284: # %vector.body5067.preheader +.LBB5_284: # %vector.body5417.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_285: # %vector.body5067 +.LBB5_285: # %vector.body5417 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_285 -# %bb.286: # %vector.body5072.preheader +# %bb.286: # %vector.body5422.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_287: # %vector.body5072 +.LBB5_287: # %vector.body5422 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3882,30 +3861,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_287 b .LBB5_573 -.LBB5_288: # %vector.body5055.preheader +.LBB5_288: # %vector.body5405.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_289: # %vector.body5055 +.LBB5_289: # %vector.body5405 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_289 -# %bb.290: # %vector.body5060.preheader +# %bb.290: # %vector.body5410.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_291: # %vector.body5060 +.LBB5_291: # %vector.body5410 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3937,30 +3916,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_291 b .LBB5_573 -.LBB5_292: # %vector.body5043.preheader +.LBB5_292: # %vector.body5393.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_293: # %vector.body5043 +.LBB5_293: # %vector.body5393 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_293 -# %bb.294: # %vector.body5048.preheader +# %bb.294: # %vector.body5398.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_295: # %vector.body5048 +.LBB5_295: # %vector.body5398 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3992,30 +3971,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_295 b .LBB5_573 -.LBB5_296: # %vector.body5031.preheader +.LBB5_296: # %vector.body5381.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_297: # %vector.body5031 +.LBB5_297: # %vector.body5381 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_297 -# %bb.298: # %vector.body5036.preheader +# %bb.298: # %vector.body5386.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_299: # %vector.body5036 +.LBB5_299: # %vector.body5386 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4047,30 +4026,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_299 b .LBB5_573 -.LBB5_300: # %vector.body5019.preheader +.LBB5_300: # %vector.body5369.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_301: # %vector.body5019 +.LBB5_301: # %vector.body5369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_301 -# %bb.302: # %vector.body5024.preheader +# %bb.302: # %vector.body5374.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_303: # %vector.body5024 +.LBB5_303: # %vector.body5374 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4102,69 +4081,66 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_303 b .LBB5_573 -.LBB5_304: # %vector.body5004.preheader +.LBB5_304: # %vector.body5348.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_305: # %vector.body5004 +.LBB5_305: # %vector.body5348 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_305 -# %bb.306: # %vector.body5009.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.306: # %vector.body5353.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_307: # %vector.body5009 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_307: # %vector.body5353 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_307 -# %bb.308: # %vector.body5014.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.308: # %vector.body5361.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_309: # %vector.body5014 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_309: # %vector.body5361 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_309 b .LBB5_573 .LBB5_310: # %.preheader.i1469.preheader @@ -4181,254 +4157,241 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_311: # %vector.body4984 +.LBB5_311: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_311 -# %bb.312: # %vector.body4989.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.312: # %vector.body5324.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_313: # %vector.body4989 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_313: # %vector.body5324 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_313 -# %bb.314: # %vector.body4994.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.314: # %vector.body5332.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_315: # %vector.body4994 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_315: # %vector.body5332 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_315 -# %bb.316: # %vector.body4999.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.316: # %vector.body5340.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_317: # %vector.body4999 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_317: # %vector.body5340 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_317 b .LBB5_573 -.LBB5_318: # %vector.body4964.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_319: # %vector.body4964 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_318: # %vector.body5293.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_319: # %vector.body5293 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_319 -# %bb.320: # %vector.body4969.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1072 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 +# %bb.320: # %vector.body5301.preheader + lu12i.w $a3, 31 + ori $a3, $a3, 1072 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 xvldi $xr0, -1424 -.LBB5_321: # %vector.body4969 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_321 -# %bb.322: # %vector.body4974.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2128 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 -.LBB5_323: # %vector.body4974 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_323 -# %bb.324: # %vector.body4979.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_325: # %vector.body4979 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_321: # %vector.body5301 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_321 +# %bb.322: # %vector.body5306.preheader + lu12i.w $a3, 62 + ori $a3, $a3, 2128 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 +.LBB5_323: # %vector.body5306 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_323 +# %bb.324: # %vector.body5311.preheader + lu12i.w $a3, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 3160 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_325: # %vector.body5311 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_325 b .LBB5_573 -.LBB5_326: # %vector.body4944.preheader +.LBB5_326: # %vector.body5264.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_327: # %vector.body4944 +.LBB5_327: # %vector.body5264 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_327 -# %bb.328: # %vector.body4949.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.328: # %vector.body5269.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_329: # %vector.body4949 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_329: # %vector.body5269 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_329 -# %bb.330: # %vector.body4954.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.330: # %vector.body5277.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_331: # %vector.body4954 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_331: # %vector.body5277 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_331 -# %bb.332: # %vector.body4959.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.332: # %vector.body5285.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_333: # %vector.body4959 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_333: # %vector.body5285 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_333 b .LBB5_573 .LBB5_334: # %.preheader.i1529.preheader @@ -4445,19 +4408,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_335: # %vector.body4934 +.LBB5_335: # %vector.body5254 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_335 -# %bb.336: # %vector.body4939.preheader +# %bb.336: # %vector.body5259.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_337: # %vector.body4939 +.LBB5_337: # %vector.body5259 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -4915,69 +4878,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_358 b .LBB5_573 -.LBB5_359: # %vector.body4850.preheader +.LBB5_359: # %vector.body5164.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_360: # %vector.body4850 +.LBB5_360: # %vector.body5164 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_360 -# %bb.361: # %vector.body4855.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.361: # %vector.body5169.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_362: # %vector.body4855 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a5 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_362: # %vector.body5169 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a3, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_362 -# %bb.363: # %vector.body4860.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.363: # %vector.body5177.preheader + lu12i.w $a3, 62 + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 ori $a1, $a1, 3328 -.LBB5_364: # %vector.body4860 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a4 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 +.LBB5_364: # %vector.body5177 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a2, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a1, $a1, -4 + addi.d $a2, $a2, 16 bnez $a1, .LBB5_364 # %bb.365: # %.preheader.i1631.preheader lu12i.w $a1, 156 @@ -5071,50 +5031,50 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_368 b .LBB5_573 -.LBB5_369: # %vector.body4830.preheader +.LBB5_369: # %vector.body5144.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_370: # %vector.body4830 +.LBB5_370: # %vector.body5144 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_370 -# %bb.371: # %vector.body4835.preheader +# %bb.371: # %vector.body5149.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_372: # %vector.body4835 +.LBB5_372: # %vector.body5149 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_372 -# %bb.373: # %vector.body4840.preheader +# %bb.373: # %vector.body5154.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_374: # %vector.body4840 +.LBB5_374: # %vector.body5154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_374 -# %bb.375: # %vector.body4845.preheader +# %bb.375: # %vector.body5159.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_376: # %vector.body4845 +.LBB5_376: # %vector.body5159 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5122,7 +5082,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_376 b .LBB5_573 -.LBB5_377: # %vector.body4810.preheader +.LBB5_377: # %vector.body5124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -5131,43 +5091,43 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_378: # %vector.body4810 +.LBB5_378: # %vector.body5124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_378 -# %bb.379: # %vector.body4815.preheader +# %bb.379: # %vector.body5129.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_380: # %vector.body4815 +.LBB5_380: # %vector.body5129 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_380 -# %bb.381: # %vector.body4820.preheader +# %bb.381: # %vector.body5134.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_382: # %vector.body4820 +.LBB5_382: # %vector.body5134 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_382 -# %bb.383: # %vector.body4825.preheader +# %bb.383: # %vector.body5139.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_384: # %vector.body4825 +.LBB5_384: # %vector.body5139 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5189,87 +5149,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_386: # %vector.body4790 +.LBB5_386: # %vector.body5095 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_386 -# %bb.387: # %vector.body4795.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.387: # %vector.body5100.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_388: # %vector.body4795 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_388: # %vector.body5100 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_388 -# %bb.389: # %vector.body4800.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.389: # %vector.body5108.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_390: # %vector.body4800 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_390: # %vector.body5108 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_390 -# %bb.391: # %vector.body4805.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.391: # %vector.body5116.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_392: # %vector.body4805 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_392: # %vector.body5116 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_392 b .LBB5_573 .LBB5_393: # %.preheader.i1701.preheader @@ -5286,14 +5241,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_394: # %vector.body4775 +.LBB5_394: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_394 -# %bb.395: # %vector.body4780.preheader +# %bb.395: # %vector.body5085.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -5301,19 +5256,19 @@ init: # @init lu12i.w $a3, 219235 ori $a3, $a3, 1981 xvreplgr2vr.w $xr0, $a3 -.LBB5_396: # %vector.body4780 +.LBB5_396: # %vector.body5085 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_396 -# %bb.397: # %vector.body4785.preheader +# %bb.397: # %vector.body5090.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_398: # %vector.body4785 +.LBB5_398: # %vector.body5090 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5335,87 +5290,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_400: # %vector.body4755 +.LBB5_400: # %vector.body5051 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_400 -# %bb.401: # %vector.body4760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.401: # %vector.body5056.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_402: # %vector.body4760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_402: # %vector.body5056 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_402 -# %bb.403: # %vector.body4765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.403: # %vector.body5064.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_404: # %vector.body4765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_404: # %vector.body5064 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_404 -# %bb.405: # %vector.body4770.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.405: # %vector.body5072.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_406: # %vector.body4770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_406: # %vector.body5072 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_406 b .LBB5_573 .LBB5_407: # %.preheader.i1740.preheader @@ -5432,19 +5382,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_408: # %vector.body4745 +.LBB5_408: # %vector.body5041 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_408 -# %bb.409: # %vector.body4750.preheader +# %bb.409: # %vector.body5046.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_410: # %vector.body4750 +.LBB5_410: # %vector.body5046 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5452,21 +5402,21 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_410 b .LBB5_573 -.LBB5_411: # %vector.body4725.preheader +.LBB5_411: # %vector.body5018.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_412: # %vector.body4725 +.LBB5_412: # %vector.body5018 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_412 -# %bb.413: # %vector.body4730.preheader +# %bb.413: # %vector.body5023.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 @@ -5474,49 +5424,48 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr1, $a4 -.LBB5_414: # %vector.body4730 +.LBB5_414: # %vector.body5023 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a2, -32 xvst $xr1, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_414 -# %bb.415: # %vector.body4735.preheader +# %bb.415: # %vector.body5028.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_416: # %vector.body4735 +.LBB5_416: # %vector.body5028 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_416 -# %bb.417: # %vector.body4740.preheader +# %bb.417: # %vector.body5033.preheader lu12i.w $a2, 93 - ori $a2, $a2, 3156 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_418: # %vector.body4740 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_418: # %vector.body5033 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_418 b .LBB5_573 .LBB5_419: # %.preheader.i1769.preheader @@ -5533,7 +5482,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_420: # %vector.body4720 +.LBB5_420: # %vector.body5013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5555,7 +5504,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_422: # %vector.body4715 +.LBB5_422: # %vector.body5008 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5563,14 +5512,14 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_422 b .LBB5_573 -.LBB5_423: # %vector.body4700.preheader +.LBB5_423: # %vector.body4993.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_424: # %vector.body4700 +.LBB5_424: # %vector.body4993 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5663,14 +5612,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_428 b .LBB5_573 -.LBB5_429: # %vector.body4685.preheader +.LBB5_429: # %vector.body4978.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_430: # %vector.body4685 +.LBB5_430: # %vector.body4978 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5763,32 +5712,31 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_434 b .LBB5_573 -.LBB5_435: # %vector.body4663.preheader +.LBB5_435: # %vector.body4947.preheader + pcalau12i $s1, %pc_hi20(.LCPI5_0) + vld $vr0, $s1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) - addi.d $a0, $s0, 4 - ori $a1, $zero, 2 - lu12i.w $s1, 7 - ori $a2, $s1, 3328 -.LBB5_436: # %vector.body4663 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_436 + addi.d $a0, $s0, 8 + lu12i.w $s2, 7 + ori $a1, $s2, 3328 +.LBB5_436: # %vector.body4947 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_436 # %bb.437: # %.preheader.i1826.preheader lu12i.w $a1, 31 ori $a0, $a1, 1040 @@ -5799,54 +5747,50 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 2104 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_438: # %vector.body4668 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_438 -# %bb.439: # %vector.body4673.preheader + ori $a1, $s2, 3328 +.LBB5_438: # %vector.body4955 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_438 +# %bb.439: # %vector.body4963.preheader lu12i.w $a0, 93 - ori $a0, $a0, 3156 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 3160 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_440: # %vector.body4673 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_440 + ori $a1, $s2, 3328 +.LBB5_440: # %vector.body4963 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_440 # %bb.441: # %.preheader.i1840.preheader lu12i.w $a0, 125 ori $a0, $a0, 128 @@ -5905,30 +5849,30 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_442 b .LBB5_573 -.LBB5_443: # %vector.body4639.preheader +.LBB5_443: # %vector.body4923.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_444: # %vector.body4639 +.LBB5_444: # %vector.body4923 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_444 -# %bb.445: # %vector.body4644.preheader +# %bb.445: # %vector.body4928.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 -.LBB5_446: # %vector.body4644 +.LBB5_446: # %vector.body4928 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5959,14 +5903,14 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_446 -# %bb.447: # %vector.body4651.preheader +# %bb.447: # %vector.body4935.preheader lu12i.w $a4, 62 - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 2096 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_448: # %vector.body4651 +.LBB5_448: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5997,12 +5941,12 @@ init: # @init addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 bnez $a3, .LBB5_448 -# %bb.449: # %vector.body4658.preheader +# %bb.449: # %vector.body4942.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_450: # %vector.body4658 +.LBB5_450: # %vector.body4942 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6010,154 +5954,148 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_450 b .LBB5_573 -.LBB5_451: # %vector.body4624.preheader +.LBB5_451: # %vector.body4902.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_452: # %vector.body4624 +.LBB5_452: # %vector.body4902 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_452 -# %bb.453: # %vector.body4629.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.453: # %vector.body4907.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_454: # %vector.body4629 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_454: # %vector.body4907 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_454 -# %bb.455: # %vector.body4634.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.455: # %vector.body4915.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_456: # %vector.body4634 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_456: # %vector.body4915 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_456 b .LBB5_573 -.LBB5_457: # %vector.body4599.preheader +.LBB5_457: # %vector.body4871.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_458: # %vector.body4599 +.LBB5_458: # %vector.body4871 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_458 -# %bb.459: # %vector.body4604.preheader +# %bb.459: # %vector.body4876.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_460: # %vector.body4604 +.LBB5_460: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_460 -# %bb.461: # %vector.body4609.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.461: # %vector.body4881.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_462: # %vector.body4609 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_462: # %vector.body4881 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_462 -# %bb.463: # %vector.body4614.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.463: # %vector.body4889.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_464: # %vector.body4614 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_464 -# %bb.465: # %vector.body4619.preheader + ori $a3, $a1, 3328 +.LBB5_464: # %vector.body4889 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 + bnez $a3, .LBB5_464 +# %bb.465: # %vector.body4897.preheader lu12i.w $a2, 125 ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3264 -.LBB5_466: # %vector.body4619 +.LBB5_466: # %vector.body4897 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6165,45 +6103,45 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_466 b .LBB5_573 -.LBB5_467: # %vector.body4574.preheader +.LBB5_467: # %vector.body4843.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_468: # %vector.body4574 +.LBB5_468: # %vector.body4843 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_468 -# %bb.469: # %vector.body4579.preheader +# %bb.469: # %vector.body4848.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_470: # %vector.body4579 +.LBB5_470: # %vector.body4848 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_470 -# %bb.471: # %vector.body4584.preheader +# %bb.471: # %vector.body4853.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_472: # %vector.body4584 +.LBB5_472: # %vector.body4853 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_472 -# %bb.473: # %vector.body4589.preheader +# %bb.473: # %vector.body4858.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a2, $a0, $a2 @@ -6211,37 +6149,36 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_474: # %vector.body4589 +.LBB5_474: # %vector.body4858 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_474 -# %bb.475: # %vector.body4594.preheader +# %bb.475: # %vector.body4863.preheader lu12i.w $a2, 125 - ori $a2, $a2, 132 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_476: # %vector.body4594 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_476: # %vector.body4863 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_476 b .LBB5_573 .LBB5_477: # %.preheader.i1927.preheader @@ -6258,74 +6195,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_478: # %vector.body4554 +.LBB5_478: # %vector.body4817 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_478 -# %bb.479: # %vector.body4559.preheader +# %bb.479: # %vector.body4822.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_480: # %vector.body4559 +.LBB5_480: # %vector.body4822 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_480 -# %bb.481: # %vector.body4564.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.481: # %vector.body4827.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_482: # %vector.body4564 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_482: # %vector.body4827 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_482 -# %bb.483: # %vector.body4569.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.483: # %vector.body4835.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_484: # %vector.body4569 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_484: # %vector.body4835 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_484 b .LBB5_573 .LBB5_485: # %.preheader.i1949.preheader @@ -6461,669 +6395,638 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_490 b .LBB5_573 -.LBB5_491: # %vector.body4519.preheader +.LBB5_491: # %vector.body4773.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_492: # %vector.body4519 +.LBB5_492: # %vector.body4773 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_492 -# %bb.493: # %vector.body4524.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.493: # %vector.body4778.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_494: # %vector.body4524 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_494: # %vector.body4778 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_494 -# %bb.495: # %vector.body4529.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.495: # %vector.body4786.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_496: # %vector.body4529 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_496: # %vector.body4786 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_496 -# %bb.497: # %vector.body4534.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.497: # %vector.body4794.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_498: # %vector.body4534 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_498: # %vector.body4794 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_498 b .LBB5_573 -.LBB5_499: # %vector.body4489.preheader +.LBB5_499: # %vector.body4734.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_500: # %vector.body4489 +.LBB5_500: # %vector.body4734 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_500 -# %bb.501: # %vector.body4494.preheader +# %bb.501: # %vector.body4739.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_502: # %vector.body4494 +.LBB5_502: # %vector.body4739 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_502 -# %bb.503: # %vector.body4499.preheader +# %bb.503: # %vector.body4744.preheader lu12i.w $a2, 46 ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_504: # %vector.body4499 +.LBB5_504: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_504 -# %bb.505: # %vector.body4504.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.505: # %vector.body4749.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_506: # %vector.body4504 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_506: # %vector.body4749 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_506 -# %bb.507: # %vector.body4509.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.507: # %vector.body4757.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_508: # %vector.body4509 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_508: # %vector.body4757 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_508 -# %bb.509: # %vector.body4514.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.509: # %vector.body4765.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_510: # %vector.body4514 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_510: # %vector.body4765 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_510 b .LBB5_573 -.LBB5_511: # %vector.body4459.preheader +.LBB5_511: # %vector.body4695.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_512: # %vector.body4459 +.LBB5_512: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_512 -# %bb.513: # %vector.body4464.preheader +# %bb.513: # %vector.body4700.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_514: # %vector.body4464 +.LBB5_514: # %vector.body4700 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_514 -# %bb.515: # %vector.body4469.preheader +# %bb.515: # %vector.body4705.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_516: # %vector.body4469 +.LBB5_516: # %vector.body4705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_516 -# %bb.517: # %vector.body4474.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.517: # %vector.body4710.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_518: # %vector.body4474 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_518: # %vector.body4710 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_518 -# %bb.519: # %vector.body4479.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.519: # %vector.body4718.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_520: # %vector.body4479 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_520: # %vector.body4718 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_520 -# %bb.521: # %vector.body4484.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.521: # %vector.body4726.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_522: # %vector.body4484 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_522: # %vector.body4726 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_522 b .LBB5_573 -.LBB5_523: # %vector.body4429.preheader +.LBB5_523: # %vector.body4656.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_524: # %vector.body4429 +.LBB5_524: # %vector.body4656 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_524 -# %bb.525: # %vector.body4434.preheader +# %bb.525: # %vector.body4661.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_526: # %vector.body4434 +.LBB5_526: # %vector.body4661 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_526 -# %bb.527: # %vector.body4439.preheader +# %bb.527: # %vector.body4666.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_528: # %vector.body4439 +.LBB5_528: # %vector.body4666 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_528 -# %bb.529: # %vector.body4444.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.529: # %vector.body4671.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_530: # %vector.body4444 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_530: # %vector.body4671 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_530 -# %bb.531: # %vector.body4449.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.531: # %vector.body4679.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_532: # %vector.body4449 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_532: # %vector.body4679 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_532 -# %bb.533: # %vector.body4454.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.533: # %vector.body4687.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_534: # %vector.body4454 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_534: # %vector.body4687 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_534 b .LBB5_573 -.LBB5_535: # %vector.body4404.preheader +.LBB5_535: # %vector.body4622.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_536: # %vector.body4404 +.LBB5_536: # %vector.body4622 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_536 -# %bb.537: # %vector.body4409.preheader +# %bb.537: # %vector.body4627.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_538: # %vector.body4409 +.LBB5_538: # %vector.body4627 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_538 -# %bb.539: # %vector.body4414.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.539: # %vector.body4632.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_540: # %vector.body4414 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_540: # %vector.body4632 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_540 -# %bb.541: # %vector.body4419.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.541: # %vector.body4640.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_542: # %vector.body4419 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_542: # %vector.body4640 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_542 -# %bb.543: # %vector.body4424.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.543: # %vector.body4648.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_544: # %vector.body4424 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_544: # %vector.body4648 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_544 b .LBB5_573 -.LBB5_545: # %vector.body4389.preheader +.LBB5_545: # %vector.body4601.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_546: # %vector.body4389 +.LBB5_546: # %vector.body4601 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_546 -# %bb.547: # %vector.body4394.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.547: # %vector.body4606.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_548: # %vector.body4394 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_548: # %vector.body4606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_548 -# %bb.549: # %vector.body4399.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.549: # %vector.body4614.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_550: # %vector.body4399 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_550: # %vector.body4614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_550 b .LBB5_573 -.LBB5_551: # %vector.body4374.preheader +.LBB5_551: # %vector.body4580.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_552: # %vector.body4374 +.LBB5_552: # %vector.body4580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_552 -# %bb.553: # %vector.body4379.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.553: # %vector.body4585.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_554: # %vector.body4379 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_554: # %vector.body4585 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_554 -# %bb.555: # %vector.body4384.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.555: # %vector.body4593.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_556: # %vector.body4384 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_556: # %vector.body4593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_556 b .LBB5_573 .LBB5_557: # %.preheader.i2121.preheader @@ -7140,19 +7043,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_558: # %vector.body4364 +.LBB5_558: # %vector.body4570 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_558 -# %bb.559: # %vector.body4369.preheader +# %bb.559: # %vector.body4575.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_560: # %vector.body4369 +.LBB5_560: # %vector.body4575 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7174,7 +7077,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_562: # %vector.body4359 +.LBB5_562: # %vector.body4565 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7196,7 +7099,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_564: # %vector.body4354 +.LBB5_564: # %vector.body4560 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7204,31 +7107,30 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_564 b .LBB5_573 -.LBB5_565: # %vector.body4349.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_566: # %vector.body4349 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_566 +.LBB5_565: # %vector.body4552.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_566: # %vector.body4552 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_566 b .LBB5_573 .LBB5_567: # %.preheader.i2154.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7377,43 +7279,43 @@ init: # @init bnez $a2, .LBB5_572 .LBB5_573: # %set1d.exit1086 move $a0, $zero - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $fp, $sp, 64 # 8-byte Folded Reload - ld.d $ra, $sp, 72 # 8-byte Folded Reload - addi.d $sp, $sp, 80 + ld.d $s2, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload + ld.d $s0, $sp, 72 # 8-byte Folded Reload + ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $ra, $sp, 88 # 8-byte Folded Reload + addi.d $sp, $sp, 96 ret .LBB5_574: # %.preheader.i2177.preheader pcalau12i $a0, %pc_hi20(global_data+640192) addi.d $a0, $a0, %pc_lo12(global_data+640192) b .LBB5_345 -.LBB5_575: # %vector.body4325.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_576: # %vector.body4325 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_576 +.LBB5_575: # %vector.body4525.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_576: # %vector.body4525 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_576 b .LBB5_573 -.LBB5_577: # %vector.body4320.preheader +.LBB5_577: # %vector.body4520.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 @@ -7421,7 +7323,7 @@ init: # @init lu12i.w $a2, 260096 ori $a2, $a2, 8 xvreplgr2vr.w $xr0, $a2 -.LBB5_578: # %vector.body4320 +.LBB5_578: # %vector.body4520 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7429,162 +7331,155 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_578 b .LBB5_573 -.LBB5_579: # %vector.body4310.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_580: # %vector.body4310 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_579: # %vector.body4504.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_580: # %vector.body4504 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_580 -# %bb.581: # %vector.body4315.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_582: # %vector.body4315 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.581: # %vector.body4512.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_582: # %vector.body4512 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_582 b .LBB5_573 -.LBB5_583: # %vector.body4305.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_584: # %vector.body4305 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_584 +.LBB5_583: # %vector.body4496.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_584: # %vector.body4496 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_584 b .LBB5_573 -.LBB5_585: # %vector.body4300.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_586: # %vector.body4300 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_586 +.LBB5_585: # %vector.body4488.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_586: # %vector.body4488 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_586 b .LBB5_573 -.LBB5_587: # %vector.body4295.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_588: # %vector.body4295 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_588 +.LBB5_587: # %vector.body4480.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_588: # %vector.body4480 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_588 b .LBB5_573 -.LBB5_589: # %vector.body4290.preheader +.LBB5_589: # %vector.body4472.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_590: # %vector.body4290 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_590 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_590: # %vector.body4472 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_590 b .LBB5_607 .LBB5_591: # %.preheader.i2230.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7603,79 +7498,74 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a1, $fp, $a0 - ori $a2, $zero, 2 + ori $a0, $a0, 2104 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + add.d $a2, $fp, $a0 lu12i.w $a0, 7 ori $a3, $a0, 3328 -.LBB5_592: # %vector.body4275 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_592: # %vector.body4448 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_592 -# %bb.593: # %vector.body4280.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.593: # %vector.body4456.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_594: # %vector.body4280 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_594: # %vector.body4456 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_594 -# %bb.595: # %vector.body4285.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.595: # %vector.body4464.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_596: # %vector.body4285 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_596: # %vector.body4464 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_596 b .LBB5_573 .LBB5_597: # %.preheader34.i2253.preheader @@ -7734,42 +7624,41 @@ init: # @init lu12i.w $a1, 220 ori $a1, $a1, 1212 b .LBB5_630 -.LBB5_600: # %vector.body4263.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_601: # %vector.body4263 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_601 - b .LBB5_573 -.LBB5_602: # %vector.body4256.preheader +.LBB5_600: # %vector.body4433.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_601: # %vector.body4433 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_601 + b .LBB5_573 +.LBB5_602: # %vector.body4426.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_603: # %vector.body4256 +.LBB5_603: # %vector.body4426 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -7804,45 +7693,44 @@ init: # @init pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) b .LBB5_610 -.LBB5_605: # %vector.body4251.preheader +.LBB5_605: # %vector.body4418.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_606: # %vector.body4251 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_606 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_606: # %vector.body4418 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_606 .LBB5_607: # %set1d.exit2229 lu12i.w $a1, 31 ori $a1, $a1, 1020 lu12i.w $a2, -262144 b .LBB5_687 -.LBB5_608: # %vector.body4246.preheader +.LBB5_608: # %vector.body4413.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_609: # %vector.body4246 +.LBB5_609: # %vector.body4413 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7855,14 +7743,14 @@ init: # @init add.d $a0, $a0, $a2 ori $a2, $a1, 1024 b .LBB5_346 -.LBB5_611: # %vector.body4241.preheader +.LBB5_611: # %vector.body4408.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_612: # %vector.body4241 +.LBB5_612: # %vector.body4408 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7886,147 +7774,141 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 b .LBB5_573 -.LBB5_614: # %vector.body4216.preheader +.LBB5_614: # %vector.body4374.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_615: # %vector.body4216 +.LBB5_615: # %vector.body4374 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_615 -# %bb.616: # %vector.body4221.preheader +# %bb.616: # %vector.body4379.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_617: # %vector.body4221 +.LBB5_617: # %vector.body4379 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_617 -# %bb.618: # %vector.body4226.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.618: # %vector.body4384.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_619: # %vector.body4226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_619: # %vector.body4384 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_619 -# %bb.620: # %vector.body4231.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.620: # %vector.body4392.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_621: # %vector.body4231 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_621: # %vector.body4392 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_621 -# %bb.622: # %vector.body4236.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.622: # %vector.body4400.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_623: # %vector.body4236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_623: # %vector.body4400 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_623 b .LBB5_573 -.LBB5_624: # %vector.body4211.preheader +.LBB5_624: # %vector.body4366.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_625: # %vector.body4211 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_625 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_625: # %vector.body4366 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_625 # %bb.626: # %set1d.exit2327 lu12i.w $a1, 31 b .LBB5_686 -.LBB5_627: # %vector.body4204.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_627: # %vector.body4359.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_628: # %vector.body4204 +.LBB5_628: # %vector.body4359 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8075,81 +7957,77 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ori $a0, $s0, 1044 + ori $a0, $s0, 1048 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_632: # %vector.body4199 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_632 - b .LBB5_573 -.LBB5_633: # %vector.body4189.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_634: # %vector.body4189 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_634 -# %bb.635: # %vector.body4194.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_636: # %vector.body4194 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_632: # %vector.body4351 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_632 + b .LBB5_573 +.LBB5_633: # %vector.body4335.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_634: # %vector.body4335 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_634 +# %bb.635: # %vector.body4343.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_636: # %vector.body4343 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_636 b .LBB5_573 .LBB5_637: # %.preheader34.i2352.preheader @@ -8248,26 +8126,26 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_640 b .LBB5_573 -.LBB5_641: # %vector.body4167.preheader +.LBB5_641: # %vector.body4313.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_642: # %vector.body4167 +.LBB5_642: # %vector.body4313 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_642 -# %bb.643: # %vector.body4172.preheader +# %bb.643: # %vector.body4318.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_644: # %vector.body4172 +.LBB5_644: # %vector.body4318 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8275,78 +8153,75 @@ init: # @init addi.d $a2, $a2, 64 bnez $a1, .LBB5_644 b .LBB5_653 -.LBB5_645: # %vector.body4157.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_646: # %vector.body4157 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_646 -# %bb.647: # %vector.body4162.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_648: # %vector.body4162 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_645: # %vector.body4297.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_646: # %vector.body4297 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_646 +# %bb.647: # %vector.body4305.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_648: # %vector.body4305 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_648 b .LBB5_573 -.LBB5_649: # %vector.body4147.preheader +.LBB5_649: # %vector.body4287.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_650: # %vector.body4147 +.LBB5_650: # %vector.body4287 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_650 -# %bb.651: # %vector.body4152.preheader +# %bb.651: # %vector.body4292.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_652: # %vector.body4152 +.LBB5_652: # %vector.body4292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8359,134 +8234,128 @@ init: # @init lu12i.w $a2, 260096 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_654: # %vector.body4132.preheader +.LBB5_654: # %vector.body4266.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_655: # %vector.body4132 +.LBB5_655: # %vector.body4266 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_655 -# %bb.656: # %vector.body4137.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.656: # %vector.body4271.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_657: # %vector.body4137 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_657: # %vector.body4271 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_657 -# %bb.658: # %vector.body4142.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.658: # %vector.body4279.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_659: # %vector.body4142 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_659: # %vector.body4279 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_659 b .LBB5_573 -.LBB5_660: # %vector.body4117.preheader +.LBB5_660: # %vector.body4245.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_661: # %vector.body4117 +.LBB5_661: # %vector.body4245 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_661 -# %bb.662: # %vector.body4122.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.662: # %vector.body4250.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_663: # %vector.body4122 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_663: # %vector.body4250 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_663 -# %bb.664: # %vector.body4127.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.664: # %vector.body4258.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_665: # %vector.body4127 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_665: # %vector.body4258 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_665 b .LBB5_573 .LBB5_666: # %.preheader.i2421.preheader @@ -8503,74 +8372,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_667: # %vector.body4097 +.LBB5_667: # %vector.body4219 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_667 -# %bb.668: # %vector.body4102.preheader +# %bb.668: # %vector.body4224.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_669: # %vector.body4102 +.LBB5_669: # %vector.body4224 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_669 -# %bb.670: # %vector.body4107.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.670: # %vector.body4229.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_671: # %vector.body4107 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_671: # %vector.body4229 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_671 -# %bb.672: # %vector.body4112.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.672: # %vector.body4237.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_673: # %vector.body4112 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_673: # %vector.body4237 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_673 b .LBB5_573 .LBB5_674: # %.preheader.i2443.preheader @@ -8719,69 +8585,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_679 b .LBB5_573 -.LBB5_680: # %vector.body4063.preheader +.LBB5_680: # %vector.body4179.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 xvldi $xr0, -1424 -.LBB5_681: # %vector.body4063 +.LBB5_681: # %vector.body4179 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a3, $a3, -16 addi.d $a1, $a1, 64 bnez $a3, .LBB5_681 -# %bb.682: # %vector.body4068.preheader +# %bb.682: # %vector.body4184.preheader lu12i.w $a1, 31 - ori $a3, $a1, 1044 - add.d $a3, $a0, $a3 - ori $a4, $zero, 2 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a4, $a1, 1048 + add.d $a4, $a0, $a4 ori $a5, $a2, 3328 -.LBB5_683: # %vector.body4068 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - addi.d $a7, $a4, -1 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa0, $a7 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a5, $a5, -2 - addi.w $a4, $a4, 2 +.LBB5_683: # %vector.body4184 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a4, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a5, $a5, -4 + addi.d $a4, $a4, 16 bnez $a5, .LBB5_683 -# %bb.684: # %vector.body4073.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2100 +# %bb.684: # %vector.body4192.preheader + lu12i.w $a4, 62 + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a3, $a4, 2104 add.d $a3, $a0, $a3 - ori $a4, $zero, 2 ori $a2, $a2, 3328 -.LBB5_685: # %vector.body4073 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a4, 31, 0 - addi.d $a6, $a4, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a2, $a2, -2 - addi.w $a4, $a4, 2 +.LBB5_685: # %vector.body4192 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a3, $a3, 16 bnez $a2, .LBB5_685 .LBB5_686: # %set1d.exit2479 ori $a1, $a1, 1020 @@ -8790,16 +8653,16 @@ init: # @init lu32i.d $a2, 0 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_688: # %vector.body4051.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_688: # %vector.body4167.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_689: # %vector.body4051 +.LBB5_689: # %vector.body4167 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8830,12 +8693,12 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_689 -# %bb.690: # %vector.body4058.preheader +# %bb.690: # %vector.body4174.preheader pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 xvldi $xr0, -1424 -.LBB5_691: # %vector.body4058 +.LBB5_691: # %vector.body4174 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -8843,28 +8706,28 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_691 b .LBB5_573 -.LBB5_692: # %vector.body4039.preheader +.LBB5_692: # %vector.body4155.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_693: # %vector.body4039 +.LBB5_693: # %vector.body4155 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_693 -# %bb.694: # %vector.body4044.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.694: # %vector.body4160.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_695: # %vector.body4044 +.LBB5_695: # %vector.body4160 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8904,15 +8767,15 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_697: # %vector.body4032 +.LBB5_697: # %vector.body4148 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8944,28 +8807,28 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_697 b .LBB5_573 -.LBB5_698: # %vector.body4020.preheader +.LBB5_698: # %vector.body4136.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_699: # %vector.body4020 +.LBB5_699: # %vector.body4136 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_699 -# %bb.700: # %vector.body4025.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.700: # %vector.body4141.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_701: # %vector.body4025 +.LBB5_701: # %vector.body4141 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8997,30 +8860,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_701 b .LBB5_573 -.LBB5_702: # %vector.body4008.preheader +.LBB5_702: # %vector.body4124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_703: # %vector.body4008 +.LBB5_703: # %vector.body4124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_703 -# %bb.704: # %vector.body4013.preheader +# %bb.704: # %vector.body4129.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_705: # %vector.body4013 +.LBB5_705: # %vector.body4129 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9052,30 +8915,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_705 b .LBB5_573 -.LBB5_706: # %vector.body3996.preheader +.LBB5_706: # %vector.body4112.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_707: # %vector.body3996 +.LBB5_707: # %vector.body4112 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_707 -# %bb.708: # %vector.body4001.preheader +# %bb.708: # %vector.body4117.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_709: # %vector.body4001 +.LBB5_709: # %vector.body4117 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9107,7 +8970,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_709 b .LBB5_573 -.LBB5_710: # %vector.body3971.preheader +.LBB5_710: # %vector.body4081.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 @@ -9115,7 +8978,7 @@ init: # @init ori $a1, $s0, 3328 xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill -.LBB5_711: # %vector.body3971 +.LBB5_711: # %vector.body4081 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9123,63 +8986,60 @@ init: # @init addi.d $a1, $a1, -16 addi.d $a0, $a0, 64 bnez $a1, .LBB5_711 -# %bb.712: # %vector.body3976.preheader +# %bb.712: # %vector.body4086.preheader lu12i.w $a1, 31 - ori $a0, $a1, 1044 - add.d $a0, $fp, $a0 - ori $a2, $zero, 2 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a2, $a1, 1048 + add.d $a2, $fp, $a2 ori $a3, $s0, 3328 -.LBB5_713: # %vector.body3976 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_713: # %vector.body4086 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_713 -# %bb.714: # %vector.body3981.preheader - lu12i.w $a0, 62 - ori $a0, $a0, 2100 +# %bb.714: # %vector.body4094.preheader + lu12i.w $a2, 62 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a2, 2104 add.d $a0, $fp, $a0 - ori $a2, $zero, 2 - ori $a3, $s0, 3328 -.LBB5_715: # %vector.body3981 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_715 -# %bb.716: # %vector.body3986.preheader + ori $a2, $s0, 3328 +.LBB5_715: # %vector.body4094 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a0, $a0, 16 + bnez $a2, .LBB5_715 +# %bb.716: # %vector.body4102.preheader lu12i.w $a0, 93 ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 xvldi $xr0, -1296 -.LBB5_717: # %vector.body3986 +.LBB5_717: # %vector.body4102 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -9198,7 +9058,7 @@ init: # @init ori $a0, $a0, 2500 add.d $a0, $fp, $a0 ori $a1, $s0, 3328 -.LBB5_719: # %vector.body3991 +.LBB5_719: # %vector.body4107 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9207,238 +9067,225 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_719 b .LBB5_573 -.LBB5_720: # %vector.body3946.preheader +.LBB5_720: # %vector.body4044.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_721: # %vector.body3946 +.LBB5_721: # %vector.body4044 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_721 -# %bb.722: # %vector.body3951.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.722: # %vector.body4049.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_723: # %vector.body3951 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_723: # %vector.body4049 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_723 -# %bb.724: # %vector.body3956.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.724: # %vector.body4057.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_725: # %vector.body3956 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_725: # %vector.body4057 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_725 -# %bb.726: # %vector.body3961.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.726: # %vector.body4065.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_727: # %vector.body3961 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_727: # %vector.body4065 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_727 -# %bb.728: # %vector.body3966.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.728: # %vector.body4073.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_729: # %vector.body3966 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_729: # %vector.body4073 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_729 b .LBB5_573 -.LBB5_730: # %vector.body3931.preheader +.LBB5_730: # %vector.body4023.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_731: # %vector.body3931 +.LBB5_731: # %vector.body4023 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_731 -# %bb.732: # %vector.body3936.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.732: # %vector.body4028.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_733: # %vector.body3936 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_733: # %vector.body4028 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_733 -# %bb.734: # %vector.body3941.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.734: # %vector.body4036.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_735: # %vector.body3941 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_735: # %vector.body4036 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_735 b .LBB5_573 -.LBB5_736: # %vector.body3921.preheader +.LBB5_736: # %vector.body4007.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_737: # %vector.body3921 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_737: # %vector.body4007 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_737 -# %bb.738: # %vector.body3926.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.738: # %vector.body4015.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_739: # %vector.body3926 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_739: # %vector.body4015 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_739 b .LBB5_573 .LBB5_740: # %.preheader.i2608.preheader @@ -9455,14 +9302,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_741: # %vector.body3911 +.LBB5_741: # %vector.body3997 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_741 -# %bb.742: # %vector.body3916.preheader +# %bb.742: # %vector.body4002.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -9470,7 +9317,7 @@ init: # @init lu12i.w $a2, 219235 ori $a2, $a2, 1981 xvreplgr2vr.w $xr0, $a2 -.LBB5_743: # %vector.body3916 +.LBB5_743: # %vector.body4002 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -9488,14 +9335,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_745: # %vector.body3904 +.LBB5_745: # %vector.body3990 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9527,248 +9374,237 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_745 b .LBB5_573 -.LBB5_746: # %vector.body3879.preheader +.LBB5_746: # %vector.body3959.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_747: # %vector.body3879 +.LBB5_747: # %vector.body3959 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_747 -# %bb.748: # %vector.body3884.preheader +# %bb.748: # %vector.body3964.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_749: # %vector.body3884 +.LBB5_749: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_749 -# %bb.750: # %vector.body3889.preheader +# %bb.750: # %vector.body3969.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_751: # %vector.body3889 +.LBB5_751: # %vector.body3969 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_751 -# %bb.752: # %vector.body3894.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.752: # %vector.body3974.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_753: # %vector.body3894 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_753: # %vector.body3974 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_753 -# %bb.754: # %vector.body3899.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.754: # %vector.body3982.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_755: # %vector.body3899 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_755: # %vector.body3982 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_755 b .LBB5_573 -.LBB5_756: # %vector.body3859.preheader +.LBB5_756: # %vector.body3930.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_757: # %vector.body3859 +.LBB5_757: # %vector.body3930 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_757 -# %bb.758: # %vector.body3864.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.758: # %vector.body3935.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_759: # %vector.body3864 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_759: # %vector.body3935 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_759 -# %bb.760: # %vector.body3869.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.760: # %vector.body3943.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_761: # %vector.body3869 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_761: # %vector.body3943 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_761 -# %bb.762: # %vector.body3874.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.762: # %vector.body3951.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_763: # %vector.body3874 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_763: # %vector.body3951 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_763 b .LBB5_573 -.LBB5_764: # %vector.body3844.preheader +.LBB5_764: # %vector.body3909.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_765: # %vector.body3844 +.LBB5_765: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_765 -# %bb.766: # %vector.body3849.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.766: # %vector.body3914.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_767: # %vector.body3849 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_767: # %vector.body3914 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_767 -# %bb.768: # %vector.body3854.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.768: # %vector.body3922.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_769: # %vector.body3854 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_769: # %vector.body3922 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_769 b .LBB5_573 .LBB5_770: # %.preheader.i2684.preheader @@ -9785,103 +9621,99 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_771: # %vector.body3829 +.LBB5_771: # %vector.body3888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_771 -# %bb.772: # %vector.body3834.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.772: # %vector.body3893.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_773: # %vector.body3834 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_773: # %vector.body3893 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_773 -# %bb.774: # %vector.body3839.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.774: # %vector.body3901.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_775: # %vector.body3839 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_775: # %vector.body3901 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_775 b .LBB5_573 -.LBB5_776: # %vector.body3819.preheader +.LBB5_776: # %vector.body3875.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 32 - lu12i.w $a2, 7 - ori $a3, $a2, 3328 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_777: # %vector.body3819 +.LBB5_777: # %vector.body3875 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 - addi.d $a1, $a1, 64 + addi.d $a2, $a2, 64 bnez $a3, .LBB5_777 -# %bb.778: # %vector.body3824.preheader - lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a0, $a0, $a1 - ori $a1, $zero, 2 - ori $a2, $a2, 3328 -.LBB5_779: # %vector.body3824 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_779 +# %bb.778: # %vector.body3880.preheader + lu12i.w $a2, 31 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 1048 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_779: # %vector.body3880 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_779 b .LBB5_573 .LBB5_780: # %.preheader.i2711.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9897,23 +9729,23 @@ init: # @init lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_781: # %vector.body3807 +.LBB5_781: # %vector.body3863 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_781 -# %bb.782: # %vector.body3812.preheader +# %bb.782: # %vector.body3868.preheader lu12i.w $a0, 62 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) pcalau12i $a2, %pc_hi20(.LCPI5_1) - xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_2) + xvld $xr1, $a2, %pc_lo12(.LCPI5_2) ori $a0, $a0, 2096 add.d $a0, $fp, $a0 ori $a1, $a1, 3328 -.LBB5_783: # %vector.body3812 +.LBB5_783: # %vector.body3868 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9959,142 +9791,135 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_785: # %vector.body3792 +.LBB5_785: # %vector.body3842 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_785 -# %bb.786: # %vector.body3797.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.786: # %vector.body3847.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_787: # %vector.body3797 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_787: # %vector.body3847 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_787 -# %bb.788: # %vector.body3802.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.788: # %vector.body3855.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_789: # %vector.body3802 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_789: # %vector.body3855 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_789 b .LBB5_573 -.LBB5_790: # %vector.body3782.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_791: # %vector.body3782 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_790: # %vector.body3826.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_791: # %vector.body3826 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_791 -# %bb.792: # %vector.body3787.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_793: # %vector.body3787 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.792: # %vector.body3834.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_793: # %vector.body3834 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_793 b .LBB5_573 -.LBB5_794: # %vector.body3770.preheader +.LBB5_794: # %vector.body3811.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_795: # %vector.body3770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_795 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_795: # %vector.body3811 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_795 # %bb.796: # %.preheader34.i2757.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -10160,127 +9985,121 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_799: # %vector.body3755 +.LBB5_799: # %vector.body3790 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_799 -# %bb.800: # %vector.body3760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.800: # %vector.body3795.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_801: # %vector.body3760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_801: # %vector.body3795 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_801 -# %bb.802: # %vector.body3765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.802: # %vector.body3803.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_803: # %vector.body3765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_803: # %vector.body3803 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_803 b .LBB5_573 -.LBB5_804: # %vector.body3740.preheader +.LBB5_804: # %vector.body3769.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_805: # %vector.body3740 +.LBB5_805: # %vector.body3769 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_805 -# %bb.806: # %vector.body3745.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.806: # %vector.body3774.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_807: # %vector.body3745 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_807: # %vector.body3774 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_807 -# %bb.808: # %vector.body3750.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.808: # %vector.body3782.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_809: # %vector.body3750 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_809: # %vector.body3782 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_809 b .LBB5_573 .LBB5_810: # %.preheader.i2797.preheader @@ -10293,14 +10112,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_811: # %vector.body3733 +.LBB5_811: # %vector.body3762 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10342,14 +10161,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_813: # %vector.body3726 +.LBB5_813: # %vector.body3755 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10391,14 +10210,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_815: # %vector.body3719 +.LBB5_815: # %vector.body3748 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10440,14 +10259,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_817: # %vector.body3712 +.LBB5_817: # %vector.body3741 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10489,14 +10308,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_819: # %vector.body3705 +.LBB5_819: # %vector.body3734 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10528,26 +10347,26 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_819 b .LBB5_573 -.LBB5_820: # %vector.body3695.preheader +.LBB5_820: # %vector.body3724.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_821: # %vector.body3695 +.LBB5_821: # %vector.body3724 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_821 -# %bb.822: # %vector.body3700.preheader +# %bb.822: # %vector.body3729.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_823: # %vector.body3700 +.LBB5_823: # %vector.body3729 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10555,95 +10374,92 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_823 b .LBB5_573 -.LBB5_824: # %vector.body3680.preheader +.LBB5_824: # %vector.body3703.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_825: # %vector.body3680 +.LBB5_825: # %vector.body3703 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_825 -# %bb.826: # %vector.body3685.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.826: # %vector.body3708.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_827: # %vector.body3685 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_827: # %vector.body3708 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_827 -# %bb.828: # %vector.body3690.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.828: # %vector.body3716.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_829: # %vector.body3690 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_829: # %vector.body3716 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_829 b .LBB5_573 -.LBB5_830: # %vector.body3668.preheader +.LBB5_830: # %vector.body3691.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_831: # %vector.body3668 +.LBB5_831: # %vector.body3691 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_831 -# %bb.832: # %vector.body3673.preheader +# %bb.832: # %vector.body3696.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_833: # %vector.body3673 +.LBB5_833: # %vector.body3696 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10675,16 +10491,16 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_833 b .LBB5_573 -.LBB5_834: # %vector.body3653.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_834: # %vector.body3674.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_835: # %vector.body3653 +.LBB5_835: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10715,7 +10531,7 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_835 -# %bb.836: # %vector.body3658.preheader +# %bb.836: # %vector.body3681.preheader pcalau12i $a1, %pc_hi20(global_data) addi.d $a1, $a1, %pc_lo12(global_data) lu12i.w $a2, 31 @@ -10723,20 +10539,20 @@ init: # @init add.d $a2, $a1, $a2 ori $a3, $a0, 3328 xvldi $xr0, -1424 -.LBB5_837: # %vector.body3658 +.LBB5_837: # %vector.body3681 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_837 -# %bb.838: # %vector.body3663.preheader +# %bb.838: # %vector.body3686.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 xvldi $xr0, -1296 -.LBB5_839: # %vector.body3663 +.LBB5_839: # %vector.body3686 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -10744,40 +10560,40 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_839 b .LBB5_573 -.LBB5_840: # %vector.body3638.preheader +.LBB5_840: # %vector.body3659.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_841: # %vector.body3638 +.LBB5_841: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_841 -# %bb.842: # %vector.body3643.preheader +# %bb.842: # %vector.body3664.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -3264 -.LBB5_843: # %vector.body3643 +.LBB5_843: # %vector.body3664 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_843 -# %bb.844: # %vector.body3648.preheader +# %bb.844: # %vector.body3669.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3265 -.LBB5_845: # %vector.body3648 +.LBB5_845: # %vector.body3669 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10785,82 +10601,78 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_845 b .LBB5_573 -.LBB5_846: # %vector.body3633.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_847: # %vector.body3633 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_847 - b .LBB5_573 -.LBB5_848: # %vector.body3623.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 +.LBB5_846: # %vector.body3651.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_849: # %vector.body3623 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_849 -# %bb.850: # %vector.body3628.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_851: # %vector.body3628 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_847: # %vector.body3651 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_847 + b .LBB5_573 +.LBB5_848: # %vector.body3635.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_849: # %vector.body3635 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_849 +# %bb.850: # %vector.body3643.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_851: # %vector.body3643 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_851 b .LBB5_573 .Lfunc_end5: diff --git a/results/MultiSource/Benchmarks/TSVC/Reductions-flt/CMakeFiles/Reductions-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Reductions-flt/CMakeFiles/Reductions-flt.dir/tsc.s index af37dd9..2fcc8e3 100644 --- a/results/MultiSource/Benchmarks/TSVC/Reductions-flt/CMakeFiles/Reductions-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Reductions-flt/CMakeFiles/Reductions-flt.dir/tsc.s @@ -11,6 +11,11 @@ .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI0_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1d .p2align 5 @@ -63,30 +68,29 @@ set1d: # @set1d bnez $a1, .LBB0_3 b .LBB0_8 .LBB0_4: # %vector.body40.preheader - addi.d $a0, $a0, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a1, %pc_hi20(.LCPI0_2) + vld $vr0, $a1, %pc_lo12(.LCPI0_2) + addi.d $a0, $a0, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB0_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB0_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB0_5 b .LBB0_8 .LBB0_6: # %.preheader.preheader move $a2, $zero @@ -118,6 +122,11 @@ set1d: # @set1d .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI1_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1ds .p2align 5 @@ -170,30 +179,29 @@ set1ds: # @set1ds bnez $a0, .LBB1_3 b .LBB1_8 .LBB1_4: # %vector.body40.preheader - addi.d $a0, $a1, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) + addi.d $a0, $a1, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB1_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB1_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB1_5 b .LBB1_8 .LBB1_6: # %.preheader.preheader move $a0, $zero @@ -685,14 +693,19 @@ check: # @check .word .LBB4_14-.LJTI4_0 .word .LBB4_17-.LJTI4_0 # -- End function - .section .rodata.cst32,"aM",@progbits,32 - .p2align 5, 0x0 # -- Begin function init + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 # -- Begin function init .LCPI5_0: + .dword 0 # 0x0 + .dword 1 # 0x1 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 +.LCPI5_1: .dword 4 # 0x4 .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 -.LCPI5_1: +.LCPI5_2: .dword 0 # 0x0 .dword 1 # 0x1 .dword 2 # 0x2 @@ -703,11 +716,12 @@ check: # @check .type init,@function init: # @init # %bb.0: - addi.d $sp, $sp, -80 - st.d $ra, $sp, 72 # 8-byte Folded Spill - st.d $fp, $sp, 64 # 8-byte Folded Spill - st.d $s0, $sp, 56 # 8-byte Folded Spill - st.d $s1, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -96 + st.d $ra, $sp, 88 # 8-byte Folded Spill + st.d $fp, $sp, 80 # 8-byte Folded Spill + st.d $s0, $sp, 72 # 8-byte Folded Spill + st.d $s1, $sp, 64 # 8-byte Folded Spill + st.d $s2, $sp, 56 # 8-byte Folded Spill move $fp, $a0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a1, $a0, %pc_lo12(.L.str.1) @@ -1661,131 +1675,122 @@ init: # @init jirl $ra, $ra, 0 bnez $a0, .LBB5_573 # %bb.136: # %vector.body.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 + addi.d $a3, $a0, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 .LBB5_137: # %vector.body # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_137 # %bb.138: # %vector.body3598.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 + lu12i.w $a3, 31 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 .LBB5_139: # %vector.body3598 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_139 -# %bb.140: # %vector.body3603.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_141: # %vector.body3603 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.140: # %vector.body3606.preheader + lu12i.w $a3, 62 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_141: # %vector.body3606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_141 -# %bb.142: # %vector.body3608.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_143: # %vector.body3608 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.142: # %vector.body3614.preheader + lu12i.w $a3, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_143: # %vector.body3614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_143 -# %bb.144: # %vector.body3613.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_145: # %vector.body3613 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 - bnez $a1, .LBB5_145 +# %bb.144: # %vector.body3622.preheader + lu12i.w $a3, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a3, 136 + add.d $a1, $a0, $a1 + ori $a2, $a2, 3328 +.LBB5_145: # %vector.body3622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_145 # %bb.146: # %.preheader34.i2938.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -1837,7 +1842,7 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_147 b .LBB5_573 -.LBB5_148: # %vector.body5445.preheader +.LBB5_148: # %vector.body5846.preheader pcalau12i $a0, %pc_hi20(X+4) addi.d $a0, $a0, %pc_lo12(X+4) pcalau12i $a1, %pc_hi20(V+4) @@ -1852,7 +1857,7 @@ init: # @init lu12i.w $a6, 7 ori $a6, $a6, 3328 .p2align 4, , 16 -.LBB5_149: # %vector.body5445 +.LBB5_149: # %vector.body5846 # =>This Inner Loop Header: Depth=1 bstrpick.d $a7, $a5, 31, 0 addi.d $t0, $a5, -5 @@ -1896,7 +1901,7 @@ init: # @init addi.d $a4, $a4, 8 bnez $a6, .LBB5_149 b .LBB5_573 -.LBB5_150: # %vector.body5412.preheader +.LBB5_150: # %vector.body5813.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -1904,24 +1909,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_151: # %vector.body5412 +.LBB5_151: # %vector.body5813 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_151 -# %bb.152: # %vector.body5417.preheader +# %bb.152: # %vector.body5818.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_153: # %vector.body5417 +.LBB5_153: # %vector.body5818 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1952,15 +1957,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_153 -# %bb.154: # %vector.body5424.preheader +# %bb.154: # %vector.body5825.preheader lu12i.w $a4, 62 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 2096 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_155: # %vector.body5424 +.LBB5_155: # %vector.body5825 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1991,15 +1996,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_155 -# %bb.156: # %vector.body5431.preheader +# %bb.156: # %vector.body5832.preheader lu12i.w $a4, 93 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 3152 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_157: # %vector.body5431 +.LBB5_157: # %vector.body5832 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2030,15 +2035,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_157 -# %bb.158: # %vector.body5438.preheader +# %bb.158: # %vector.body5839.preheader lu12i.w $a4, 125 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_159: # %vector.body5438 +.LBB5_159: # %vector.body5839 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2070,7 +2075,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_159 b .LBB5_573 -.LBB5_160: # %vector.body5400.preheader +.LBB5_160: # %vector.body5801.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2078,24 +2083,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_161: # %vector.body5400 +.LBB5_161: # %vector.body5801 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_161 -# %bb.162: # %vector.body5405.preheader +# %bb.162: # %vector.body5806.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_163: # %vector.body5405 +.LBB5_163: # %vector.body5806 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2127,7 +2132,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_163 b .LBB5_573 -.LBB5_164: # %vector.body5388.preheader +.LBB5_164: # %vector.body5789.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2135,24 +2140,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_165: # %vector.body5388 +.LBB5_165: # %vector.body5789 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_165 -# %bb.166: # %vector.body5393.preheader +# %bb.166: # %vector.body5794.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_167: # %vector.body5393 +.LBB5_167: # %vector.body5794 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2288,7 +2293,7 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_171 b .LBB5_573 -.LBB5_172: # %vector.body5354.preheader +.LBB5_172: # %vector.body5755.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2296,7 +2301,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_173: # %vector.body5354 +.LBB5_173: # %vector.body5755 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2436,14 +2441,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_179 b .LBB5_573 -.LBB5_180: # %vector.body5349.preheader +.LBB5_180: # %vector.body5750.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_181: # %vector.body5349 +.LBB5_181: # %vector.body5750 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -2451,7 +2456,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_181 b .LBB5_573 -.LBB5_182: # %vector.body5339.preheader +.LBB5_182: # %vector.body5740.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2459,7 +2464,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_183: # %vector.body5339 +.LBB5_183: # %vector.body5740 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2610,30 +2615,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_189 b .LBB5_573 -.LBB5_190: # %vector.body5315.preheader +.LBB5_190: # %vector.body5716.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_191: # %vector.body5315 +.LBB5_191: # %vector.body5716 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_191 -# %bb.192: # %vector.body5320.preheader +# %bb.192: # %vector.body5721.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_193: # %vector.body5320 +.LBB5_193: # %vector.body5721 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2665,30 +2670,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_193 b .LBB5_573 -.LBB5_194: # %vector.body5303.preheader +.LBB5_194: # %vector.body5704.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_195: # %vector.body5303 +.LBB5_195: # %vector.body5704 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_195 -# %bb.196: # %vector.body5308.preheader +# %bb.196: # %vector.body5709.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_197: # %vector.body5308 +.LBB5_197: # %vector.body5709 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2734,74 +2739,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_199: # %vector.body5283 +.LBB5_199: # %vector.body5678 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_199 -# %bb.200: # %vector.body5288.preheader +# %bb.200: # %vector.body5683.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_201: # %vector.body5288 +.LBB5_201: # %vector.body5683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_201 -# %bb.202: # %vector.body5293.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.202: # %vector.body5688.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_203: # %vector.body5293 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_203: # %vector.body5688 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_203 -# %bb.204: # %vector.body5298.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.204: # %vector.body5696.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_205: # %vector.body5298 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_205: # %vector.body5696 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_205 b .LBB5_573 .LBB5_206: # %.preheader.i1195.preheader @@ -2818,74 +2820,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_207: # %vector.body5263 +.LBB5_207: # %vector.body5652 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_207 -# %bb.208: # %vector.body5268.preheader +# %bb.208: # %vector.body5657.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_209: # %vector.body5268 +.LBB5_209: # %vector.body5657 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_209 -# %bb.210: # %vector.body5273.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.210: # %vector.body5662.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_211: # %vector.body5273 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_211: # %vector.body5662 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_211 -# %bb.212: # %vector.body5278.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.212: # %vector.body5670.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_213: # %vector.body5278 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_213: # %vector.body5670 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_213 b .LBB5_573 .LBB5_214: # %.preheader.i1217.preheader @@ -3072,31 +3071,30 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_221 -# %bb.222: # %vector.body5236.preheader - pcalau12i $a1, %pc_hi20(array+4) - addi.d $a1, $a1, %pc_lo12(array+4) - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_223: # %vector.body5236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_223 +# %bb.222: # %vector.body5622.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(array+8) + addi.d $a1, $a1, %pc_lo12(array+8) + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_223: # %vector.body5622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_223 # %bb.224: # %.preheader34.i1253.preheader lu12i.w $a1, 284 ori $a1, $a1, 1888 @@ -3162,87 +3160,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_227: # %vector.body5211 +.LBB5_227: # %vector.body5588 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_227 -# %bb.228: # %vector.body5216.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.228: # %vector.body5593.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_229: # %vector.body5216 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_229: # %vector.body5593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_229 -# %bb.230: # %vector.body5221.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.230: # %vector.body5601.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_231: # %vector.body5221 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_231: # %vector.body5601 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_231 -# %bb.232: # %vector.body5226.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.232: # %vector.body5609.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_233: # %vector.body5226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_233: # %vector.body5609 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_233 b .LBB5_573 .LBB5_234: # %.preheader.i1284.preheader @@ -3259,32 +3252,32 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -3264 -.LBB5_235: # %vector.body5196 +.LBB5_235: # %vector.body5573 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_235 -# %bb.236: # %vector.body5201.preheader +# %bb.236: # %vector.body5578.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_237: # %vector.body5201 +.LBB5_237: # %vector.body5578 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_237 -# %bb.238: # %vector.body5206.preheader +# %bb.238: # %vector.body5583.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_239: # %vector.body5206 +.LBB5_239: # %vector.body5583 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -3292,30 +3285,30 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_239 b .LBB5_573 -.LBB5_240: # %vector.body5184.preheader +.LBB5_240: # %vector.body5561.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_241: # %vector.body5184 +.LBB5_241: # %vector.body5561 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_241 -# %bb.242: # %vector.body5189.preheader +# %bb.242: # %vector.body5566.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_243: # %vector.body5189 +.LBB5_243: # %vector.body5566 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3392,65 +3385,62 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_245 -# %bb.246: # %vector.body5174.preheader +# %bb.246: # %vector.body5545.preheader lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_247: # %vector.body5174 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_247: # %vector.body5545 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_247 -# %bb.248: # %vector.body5179.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.248: # %vector.body5553.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_249: # %vector.body5179 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_249: # %vector.body5553 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_249 b .LBB5_573 -.LBB5_250: # %vector.body5157.preheader +.LBB5_250: # %vector.body5528.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_251: # %vector.body5157 +.LBB5_251: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3506,30 +3496,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_253 b .LBB5_573 -.LBB5_254: # %vector.body5145.preheader +.LBB5_254: # %vector.body5516.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_255: # %vector.body5145 +.LBB5_255: # %vector.body5516 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_255 -# %bb.256: # %vector.body5150.preheader +# %bb.256: # %vector.body5521.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_257: # %vector.body5150 +.LBB5_257: # %vector.body5521 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3561,14 +3551,14 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_257 b .LBB5_573 -.LBB5_258: # %vector.body5125.preheader +.LBB5_258: # %vector.body5487.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 xvldi $xr0, -1424 -.LBB5_259: # %vector.body5125 +.LBB5_259: # %vector.body5487 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3583,274 +3573,263 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 + lu12i.w $a1, 62 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 2104 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_261: # %vector.body5130 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_261: # %vector.body5492 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_261 -# %bb.262: # %vector.body5135.preheader - lu12i.w $a0, 93 - ori $a0, $a0, 3156 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 +# %bb.262: # %vector.body5500.preheader + lu12i.w $a1, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 3160 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_263: # %vector.body5135 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_263: # %vector.body5500 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_263 -# %bb.264: # %vector.body5140.preheader - lu12i.w $a0, 125 - ori $a0, $a0, 132 +# %bb.264: # %vector.body5508.preheader + lu12i.w $a1, 125 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a1, 136 add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - ori $a2, $s0, 3328 -.LBB5_265: # %vector.body5140 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_265 + ori $a1, $s0, 3328 +.LBB5_265: # %vector.body5508 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_265 b .LBB5_573 -.LBB5_266: # %vector.body5094.preheader +.LBB5_266: # %vector.body5450.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_267: # %vector.body5094 +.LBB5_267: # %vector.body5450 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_267 -# %bb.268: # %vector.body5099.preheader +# %bb.268: # %vector.body5455.preheader lu12i.w $a2, 31 ori $a3, $a2, 1048 add.d $a4, $a0, $a3 lu12i.w $a3, 3 ori $a5, $a3, 3712 lu12i.w $a6, 260096 -.LBB5_269: # %vector.body5099 +.LBB5_269: # %vector.body5455 # =>This Inner Loop Header: Depth=1 st.w $a6, $a4, -8 st.w $a6, $a4, 0 addi.d $a5, $a5, -2 addi.d $a4, $a4, 16 bnez $a5, .LBB5_269 -# %bb.270: # %vector.body5104.preheader +# %bb.270: # %vector.body5460.preheader ori $a2, $a2, 1052 add.d $a2, $a0, $a2 ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 -.LBB5_271: # %vector.body5104 +.LBB5_271: # %vector.body5460 # =>This Inner Loop Header: Depth=1 st.w $a4, $a2, -8 st.w $a4, $a2, 0 addi.d $a3, $a3, -2 addi.d $a2, $a2, 16 bnez $a3, .LBB5_271 -# %bb.272: # %vector.body5110.preheader +# %bb.272: # %vector.body5466.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_273: # %vector.body5110 +.LBB5_273: # %vector.body5466 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_273 -# %bb.274: # %vector.body5115.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.274: # %vector.body5471.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_275: # %vector.body5115 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_275: # %vector.body5471 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_275 -# %bb.276: # %vector.body5120.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.276: # %vector.body5479.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_277: # %vector.body5120 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_277: # %vector.body5479 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_277 b .LBB5_573 -.LBB5_278: # %vector.body5079.preheader +.LBB5_278: # %vector.body5429.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_279: # %vector.body5079 +.LBB5_279: # %vector.body5429 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_279 -# %bb.280: # %vector.body5084.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.280: # %vector.body5434.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_281: # %vector.body5084 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_281: # %vector.body5434 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_281 -# %bb.282: # %vector.body5089.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.282: # %vector.body5442.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_283: # %vector.body5089 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_283: # %vector.body5442 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_283 b .LBB5_573 -.LBB5_284: # %vector.body5067.preheader +.LBB5_284: # %vector.body5417.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_285: # %vector.body5067 +.LBB5_285: # %vector.body5417 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_285 -# %bb.286: # %vector.body5072.preheader +# %bb.286: # %vector.body5422.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_287: # %vector.body5072 +.LBB5_287: # %vector.body5422 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3882,30 +3861,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_287 b .LBB5_573 -.LBB5_288: # %vector.body5055.preheader +.LBB5_288: # %vector.body5405.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_289: # %vector.body5055 +.LBB5_289: # %vector.body5405 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_289 -# %bb.290: # %vector.body5060.preheader +# %bb.290: # %vector.body5410.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_291: # %vector.body5060 +.LBB5_291: # %vector.body5410 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3937,30 +3916,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_291 b .LBB5_573 -.LBB5_292: # %vector.body5043.preheader +.LBB5_292: # %vector.body5393.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_293: # %vector.body5043 +.LBB5_293: # %vector.body5393 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_293 -# %bb.294: # %vector.body5048.preheader +# %bb.294: # %vector.body5398.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_295: # %vector.body5048 +.LBB5_295: # %vector.body5398 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3992,30 +3971,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_295 b .LBB5_573 -.LBB5_296: # %vector.body5031.preheader +.LBB5_296: # %vector.body5381.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_297: # %vector.body5031 +.LBB5_297: # %vector.body5381 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_297 -# %bb.298: # %vector.body5036.preheader +# %bb.298: # %vector.body5386.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_299: # %vector.body5036 +.LBB5_299: # %vector.body5386 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4047,30 +4026,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_299 b .LBB5_573 -.LBB5_300: # %vector.body5019.preheader +.LBB5_300: # %vector.body5369.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_301: # %vector.body5019 +.LBB5_301: # %vector.body5369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_301 -# %bb.302: # %vector.body5024.preheader +# %bb.302: # %vector.body5374.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_303: # %vector.body5024 +.LBB5_303: # %vector.body5374 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4102,69 +4081,66 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_303 b .LBB5_573 -.LBB5_304: # %vector.body5004.preheader +.LBB5_304: # %vector.body5348.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_305: # %vector.body5004 +.LBB5_305: # %vector.body5348 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_305 -# %bb.306: # %vector.body5009.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.306: # %vector.body5353.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_307: # %vector.body5009 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_307: # %vector.body5353 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_307 -# %bb.308: # %vector.body5014.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.308: # %vector.body5361.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_309: # %vector.body5014 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_309: # %vector.body5361 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_309 b .LBB5_573 .LBB5_310: # %.preheader.i1469.preheader @@ -4181,254 +4157,241 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_311: # %vector.body4984 +.LBB5_311: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_311 -# %bb.312: # %vector.body4989.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.312: # %vector.body5324.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_313: # %vector.body4989 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_313: # %vector.body5324 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_313 -# %bb.314: # %vector.body4994.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.314: # %vector.body5332.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_315: # %vector.body4994 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_315: # %vector.body5332 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_315 -# %bb.316: # %vector.body4999.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.316: # %vector.body5340.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_317: # %vector.body4999 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_317: # %vector.body5340 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_317 b .LBB5_573 -.LBB5_318: # %vector.body4964.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_319: # %vector.body4964 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_318: # %vector.body5293.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_319: # %vector.body5293 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_319 -# %bb.320: # %vector.body4969.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1072 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 +# %bb.320: # %vector.body5301.preheader + lu12i.w $a3, 31 + ori $a3, $a3, 1072 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 xvldi $xr0, -1424 -.LBB5_321: # %vector.body4969 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_321 -# %bb.322: # %vector.body4974.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2128 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 -.LBB5_323: # %vector.body4974 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_323 -# %bb.324: # %vector.body4979.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_325: # %vector.body4979 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_321: # %vector.body5301 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_321 +# %bb.322: # %vector.body5306.preheader + lu12i.w $a3, 62 + ori $a3, $a3, 2128 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 +.LBB5_323: # %vector.body5306 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_323 +# %bb.324: # %vector.body5311.preheader + lu12i.w $a3, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 3160 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_325: # %vector.body5311 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_325 b .LBB5_573 -.LBB5_326: # %vector.body4944.preheader +.LBB5_326: # %vector.body5264.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_327: # %vector.body4944 +.LBB5_327: # %vector.body5264 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_327 -# %bb.328: # %vector.body4949.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.328: # %vector.body5269.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_329: # %vector.body4949 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_329: # %vector.body5269 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_329 -# %bb.330: # %vector.body4954.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.330: # %vector.body5277.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_331: # %vector.body4954 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_331: # %vector.body5277 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_331 -# %bb.332: # %vector.body4959.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.332: # %vector.body5285.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_333: # %vector.body4959 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_333: # %vector.body5285 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_333 b .LBB5_573 .LBB5_334: # %.preheader.i1529.preheader @@ -4445,19 +4408,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_335: # %vector.body4934 +.LBB5_335: # %vector.body5254 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_335 -# %bb.336: # %vector.body4939.preheader +# %bb.336: # %vector.body5259.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_337: # %vector.body4939 +.LBB5_337: # %vector.body5259 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -4915,69 +4878,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_358 b .LBB5_573 -.LBB5_359: # %vector.body4850.preheader +.LBB5_359: # %vector.body5164.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_360: # %vector.body4850 +.LBB5_360: # %vector.body5164 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_360 -# %bb.361: # %vector.body4855.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.361: # %vector.body5169.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_362: # %vector.body4855 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a5 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_362: # %vector.body5169 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a3, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_362 -# %bb.363: # %vector.body4860.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.363: # %vector.body5177.preheader + lu12i.w $a3, 62 + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 ori $a1, $a1, 3328 -.LBB5_364: # %vector.body4860 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a4 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 +.LBB5_364: # %vector.body5177 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a2, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a1, $a1, -4 + addi.d $a2, $a2, 16 bnez $a1, .LBB5_364 # %bb.365: # %.preheader.i1631.preheader lu12i.w $a1, 156 @@ -5071,50 +5031,50 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_368 b .LBB5_573 -.LBB5_369: # %vector.body4830.preheader +.LBB5_369: # %vector.body5144.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_370: # %vector.body4830 +.LBB5_370: # %vector.body5144 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_370 -# %bb.371: # %vector.body4835.preheader +# %bb.371: # %vector.body5149.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_372: # %vector.body4835 +.LBB5_372: # %vector.body5149 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_372 -# %bb.373: # %vector.body4840.preheader +# %bb.373: # %vector.body5154.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_374: # %vector.body4840 +.LBB5_374: # %vector.body5154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_374 -# %bb.375: # %vector.body4845.preheader +# %bb.375: # %vector.body5159.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_376: # %vector.body4845 +.LBB5_376: # %vector.body5159 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5122,7 +5082,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_376 b .LBB5_573 -.LBB5_377: # %vector.body4810.preheader +.LBB5_377: # %vector.body5124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -5131,43 +5091,43 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_378: # %vector.body4810 +.LBB5_378: # %vector.body5124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_378 -# %bb.379: # %vector.body4815.preheader +# %bb.379: # %vector.body5129.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_380: # %vector.body4815 +.LBB5_380: # %vector.body5129 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_380 -# %bb.381: # %vector.body4820.preheader +# %bb.381: # %vector.body5134.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_382: # %vector.body4820 +.LBB5_382: # %vector.body5134 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_382 -# %bb.383: # %vector.body4825.preheader +# %bb.383: # %vector.body5139.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_384: # %vector.body4825 +.LBB5_384: # %vector.body5139 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5189,87 +5149,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_386: # %vector.body4790 +.LBB5_386: # %vector.body5095 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_386 -# %bb.387: # %vector.body4795.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.387: # %vector.body5100.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_388: # %vector.body4795 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_388: # %vector.body5100 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_388 -# %bb.389: # %vector.body4800.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.389: # %vector.body5108.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_390: # %vector.body4800 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_390: # %vector.body5108 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_390 -# %bb.391: # %vector.body4805.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.391: # %vector.body5116.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_392: # %vector.body4805 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_392: # %vector.body5116 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_392 b .LBB5_573 .LBB5_393: # %.preheader.i1701.preheader @@ -5286,14 +5241,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_394: # %vector.body4775 +.LBB5_394: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_394 -# %bb.395: # %vector.body4780.preheader +# %bb.395: # %vector.body5085.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -5301,19 +5256,19 @@ init: # @init lu12i.w $a3, 219235 ori $a3, $a3, 1981 xvreplgr2vr.w $xr0, $a3 -.LBB5_396: # %vector.body4780 +.LBB5_396: # %vector.body5085 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_396 -# %bb.397: # %vector.body4785.preheader +# %bb.397: # %vector.body5090.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_398: # %vector.body4785 +.LBB5_398: # %vector.body5090 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5335,87 +5290,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_400: # %vector.body4755 +.LBB5_400: # %vector.body5051 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_400 -# %bb.401: # %vector.body4760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.401: # %vector.body5056.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_402: # %vector.body4760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_402: # %vector.body5056 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_402 -# %bb.403: # %vector.body4765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.403: # %vector.body5064.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_404: # %vector.body4765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_404: # %vector.body5064 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_404 -# %bb.405: # %vector.body4770.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.405: # %vector.body5072.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_406: # %vector.body4770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_406: # %vector.body5072 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_406 b .LBB5_573 .LBB5_407: # %.preheader.i1740.preheader @@ -5432,19 +5382,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_408: # %vector.body4745 +.LBB5_408: # %vector.body5041 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_408 -# %bb.409: # %vector.body4750.preheader +# %bb.409: # %vector.body5046.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_410: # %vector.body4750 +.LBB5_410: # %vector.body5046 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5452,21 +5402,21 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_410 b .LBB5_573 -.LBB5_411: # %vector.body4725.preheader +.LBB5_411: # %vector.body5018.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_412: # %vector.body4725 +.LBB5_412: # %vector.body5018 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_412 -# %bb.413: # %vector.body4730.preheader +# %bb.413: # %vector.body5023.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 @@ -5474,49 +5424,48 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr1, $a4 -.LBB5_414: # %vector.body4730 +.LBB5_414: # %vector.body5023 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a2, -32 xvst $xr1, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_414 -# %bb.415: # %vector.body4735.preheader +# %bb.415: # %vector.body5028.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_416: # %vector.body4735 +.LBB5_416: # %vector.body5028 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_416 -# %bb.417: # %vector.body4740.preheader +# %bb.417: # %vector.body5033.preheader lu12i.w $a2, 93 - ori $a2, $a2, 3156 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_418: # %vector.body4740 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_418: # %vector.body5033 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_418 b .LBB5_573 .LBB5_419: # %.preheader.i1769.preheader @@ -5533,7 +5482,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_420: # %vector.body4720 +.LBB5_420: # %vector.body5013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5555,7 +5504,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_422: # %vector.body4715 +.LBB5_422: # %vector.body5008 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5563,14 +5512,14 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_422 b .LBB5_573 -.LBB5_423: # %vector.body4700.preheader +.LBB5_423: # %vector.body4993.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_424: # %vector.body4700 +.LBB5_424: # %vector.body4993 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5663,14 +5612,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_428 b .LBB5_573 -.LBB5_429: # %vector.body4685.preheader +.LBB5_429: # %vector.body4978.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_430: # %vector.body4685 +.LBB5_430: # %vector.body4978 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5763,32 +5712,31 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_434 b .LBB5_573 -.LBB5_435: # %vector.body4663.preheader +.LBB5_435: # %vector.body4947.preheader + pcalau12i $s1, %pc_hi20(.LCPI5_0) + vld $vr0, $s1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) - addi.d $a0, $s0, 4 - ori $a1, $zero, 2 - lu12i.w $s1, 7 - ori $a2, $s1, 3328 -.LBB5_436: # %vector.body4663 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_436 + addi.d $a0, $s0, 8 + lu12i.w $s2, 7 + ori $a1, $s2, 3328 +.LBB5_436: # %vector.body4947 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_436 # %bb.437: # %.preheader.i1826.preheader lu12i.w $a1, 31 ori $a0, $a1, 1040 @@ -5799,54 +5747,50 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 2104 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_438: # %vector.body4668 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_438 -# %bb.439: # %vector.body4673.preheader + ori $a1, $s2, 3328 +.LBB5_438: # %vector.body4955 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_438 +# %bb.439: # %vector.body4963.preheader lu12i.w $a0, 93 - ori $a0, $a0, 3156 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 3160 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_440: # %vector.body4673 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_440 + ori $a1, $s2, 3328 +.LBB5_440: # %vector.body4963 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_440 # %bb.441: # %.preheader.i1840.preheader lu12i.w $a0, 125 ori $a0, $a0, 128 @@ -5905,30 +5849,30 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_442 b .LBB5_573 -.LBB5_443: # %vector.body4639.preheader +.LBB5_443: # %vector.body4923.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_444: # %vector.body4639 +.LBB5_444: # %vector.body4923 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_444 -# %bb.445: # %vector.body4644.preheader +# %bb.445: # %vector.body4928.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 -.LBB5_446: # %vector.body4644 +.LBB5_446: # %vector.body4928 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5959,14 +5903,14 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_446 -# %bb.447: # %vector.body4651.preheader +# %bb.447: # %vector.body4935.preheader lu12i.w $a4, 62 - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 2096 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_448: # %vector.body4651 +.LBB5_448: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5997,12 +5941,12 @@ init: # @init addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 bnez $a3, .LBB5_448 -# %bb.449: # %vector.body4658.preheader +# %bb.449: # %vector.body4942.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_450: # %vector.body4658 +.LBB5_450: # %vector.body4942 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6010,154 +5954,148 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_450 b .LBB5_573 -.LBB5_451: # %vector.body4624.preheader +.LBB5_451: # %vector.body4902.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_452: # %vector.body4624 +.LBB5_452: # %vector.body4902 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_452 -# %bb.453: # %vector.body4629.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.453: # %vector.body4907.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_454: # %vector.body4629 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_454: # %vector.body4907 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_454 -# %bb.455: # %vector.body4634.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.455: # %vector.body4915.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_456: # %vector.body4634 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_456: # %vector.body4915 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_456 b .LBB5_573 -.LBB5_457: # %vector.body4599.preheader +.LBB5_457: # %vector.body4871.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_458: # %vector.body4599 +.LBB5_458: # %vector.body4871 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_458 -# %bb.459: # %vector.body4604.preheader +# %bb.459: # %vector.body4876.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_460: # %vector.body4604 +.LBB5_460: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_460 -# %bb.461: # %vector.body4609.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.461: # %vector.body4881.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_462: # %vector.body4609 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_462: # %vector.body4881 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_462 -# %bb.463: # %vector.body4614.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.463: # %vector.body4889.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_464: # %vector.body4614 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_464 -# %bb.465: # %vector.body4619.preheader + ori $a3, $a1, 3328 +.LBB5_464: # %vector.body4889 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 + bnez $a3, .LBB5_464 +# %bb.465: # %vector.body4897.preheader lu12i.w $a2, 125 ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3264 -.LBB5_466: # %vector.body4619 +.LBB5_466: # %vector.body4897 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6165,45 +6103,45 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_466 b .LBB5_573 -.LBB5_467: # %vector.body4574.preheader +.LBB5_467: # %vector.body4843.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_468: # %vector.body4574 +.LBB5_468: # %vector.body4843 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_468 -# %bb.469: # %vector.body4579.preheader +# %bb.469: # %vector.body4848.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_470: # %vector.body4579 +.LBB5_470: # %vector.body4848 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_470 -# %bb.471: # %vector.body4584.preheader +# %bb.471: # %vector.body4853.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_472: # %vector.body4584 +.LBB5_472: # %vector.body4853 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_472 -# %bb.473: # %vector.body4589.preheader +# %bb.473: # %vector.body4858.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a2, $a0, $a2 @@ -6211,37 +6149,36 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_474: # %vector.body4589 +.LBB5_474: # %vector.body4858 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_474 -# %bb.475: # %vector.body4594.preheader +# %bb.475: # %vector.body4863.preheader lu12i.w $a2, 125 - ori $a2, $a2, 132 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_476: # %vector.body4594 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_476: # %vector.body4863 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_476 b .LBB5_573 .LBB5_477: # %.preheader.i1927.preheader @@ -6258,74 +6195,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_478: # %vector.body4554 +.LBB5_478: # %vector.body4817 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_478 -# %bb.479: # %vector.body4559.preheader +# %bb.479: # %vector.body4822.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_480: # %vector.body4559 +.LBB5_480: # %vector.body4822 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_480 -# %bb.481: # %vector.body4564.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.481: # %vector.body4827.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_482: # %vector.body4564 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_482: # %vector.body4827 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_482 -# %bb.483: # %vector.body4569.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.483: # %vector.body4835.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_484: # %vector.body4569 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_484: # %vector.body4835 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_484 b .LBB5_573 .LBB5_485: # %.preheader.i1949.preheader @@ -6461,669 +6395,638 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_490 b .LBB5_573 -.LBB5_491: # %vector.body4519.preheader +.LBB5_491: # %vector.body4773.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_492: # %vector.body4519 +.LBB5_492: # %vector.body4773 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_492 -# %bb.493: # %vector.body4524.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.493: # %vector.body4778.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_494: # %vector.body4524 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_494: # %vector.body4778 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_494 -# %bb.495: # %vector.body4529.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.495: # %vector.body4786.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_496: # %vector.body4529 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_496: # %vector.body4786 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_496 -# %bb.497: # %vector.body4534.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.497: # %vector.body4794.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_498: # %vector.body4534 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_498: # %vector.body4794 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_498 b .LBB5_573 -.LBB5_499: # %vector.body4489.preheader +.LBB5_499: # %vector.body4734.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_500: # %vector.body4489 +.LBB5_500: # %vector.body4734 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_500 -# %bb.501: # %vector.body4494.preheader +# %bb.501: # %vector.body4739.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_502: # %vector.body4494 +.LBB5_502: # %vector.body4739 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_502 -# %bb.503: # %vector.body4499.preheader +# %bb.503: # %vector.body4744.preheader lu12i.w $a2, 46 ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_504: # %vector.body4499 +.LBB5_504: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_504 -# %bb.505: # %vector.body4504.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.505: # %vector.body4749.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_506: # %vector.body4504 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_506: # %vector.body4749 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_506 -# %bb.507: # %vector.body4509.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.507: # %vector.body4757.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_508: # %vector.body4509 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_508: # %vector.body4757 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_508 -# %bb.509: # %vector.body4514.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.509: # %vector.body4765.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_510: # %vector.body4514 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_510: # %vector.body4765 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_510 b .LBB5_573 -.LBB5_511: # %vector.body4459.preheader +.LBB5_511: # %vector.body4695.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_512: # %vector.body4459 +.LBB5_512: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_512 -# %bb.513: # %vector.body4464.preheader +# %bb.513: # %vector.body4700.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_514: # %vector.body4464 +.LBB5_514: # %vector.body4700 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_514 -# %bb.515: # %vector.body4469.preheader +# %bb.515: # %vector.body4705.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_516: # %vector.body4469 +.LBB5_516: # %vector.body4705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_516 -# %bb.517: # %vector.body4474.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.517: # %vector.body4710.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_518: # %vector.body4474 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_518: # %vector.body4710 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_518 -# %bb.519: # %vector.body4479.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.519: # %vector.body4718.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_520: # %vector.body4479 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_520: # %vector.body4718 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_520 -# %bb.521: # %vector.body4484.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.521: # %vector.body4726.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_522: # %vector.body4484 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_522: # %vector.body4726 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_522 b .LBB5_573 -.LBB5_523: # %vector.body4429.preheader +.LBB5_523: # %vector.body4656.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_524: # %vector.body4429 +.LBB5_524: # %vector.body4656 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_524 -# %bb.525: # %vector.body4434.preheader +# %bb.525: # %vector.body4661.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_526: # %vector.body4434 +.LBB5_526: # %vector.body4661 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_526 -# %bb.527: # %vector.body4439.preheader +# %bb.527: # %vector.body4666.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_528: # %vector.body4439 +.LBB5_528: # %vector.body4666 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_528 -# %bb.529: # %vector.body4444.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.529: # %vector.body4671.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_530: # %vector.body4444 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_530: # %vector.body4671 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_530 -# %bb.531: # %vector.body4449.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.531: # %vector.body4679.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_532: # %vector.body4449 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_532: # %vector.body4679 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_532 -# %bb.533: # %vector.body4454.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.533: # %vector.body4687.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_534: # %vector.body4454 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_534: # %vector.body4687 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_534 b .LBB5_573 -.LBB5_535: # %vector.body4404.preheader +.LBB5_535: # %vector.body4622.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_536: # %vector.body4404 +.LBB5_536: # %vector.body4622 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_536 -# %bb.537: # %vector.body4409.preheader +# %bb.537: # %vector.body4627.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_538: # %vector.body4409 +.LBB5_538: # %vector.body4627 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_538 -# %bb.539: # %vector.body4414.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.539: # %vector.body4632.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_540: # %vector.body4414 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_540: # %vector.body4632 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_540 -# %bb.541: # %vector.body4419.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.541: # %vector.body4640.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_542: # %vector.body4419 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_542: # %vector.body4640 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_542 -# %bb.543: # %vector.body4424.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.543: # %vector.body4648.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_544: # %vector.body4424 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_544: # %vector.body4648 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_544 b .LBB5_573 -.LBB5_545: # %vector.body4389.preheader +.LBB5_545: # %vector.body4601.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_546: # %vector.body4389 +.LBB5_546: # %vector.body4601 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_546 -# %bb.547: # %vector.body4394.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.547: # %vector.body4606.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_548: # %vector.body4394 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_548: # %vector.body4606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_548 -# %bb.549: # %vector.body4399.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.549: # %vector.body4614.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_550: # %vector.body4399 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_550: # %vector.body4614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_550 b .LBB5_573 -.LBB5_551: # %vector.body4374.preheader +.LBB5_551: # %vector.body4580.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_552: # %vector.body4374 +.LBB5_552: # %vector.body4580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_552 -# %bb.553: # %vector.body4379.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.553: # %vector.body4585.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_554: # %vector.body4379 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_554: # %vector.body4585 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_554 -# %bb.555: # %vector.body4384.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.555: # %vector.body4593.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_556: # %vector.body4384 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_556: # %vector.body4593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_556 b .LBB5_573 .LBB5_557: # %.preheader.i2121.preheader @@ -7140,19 +7043,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_558: # %vector.body4364 +.LBB5_558: # %vector.body4570 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_558 -# %bb.559: # %vector.body4369.preheader +# %bb.559: # %vector.body4575.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_560: # %vector.body4369 +.LBB5_560: # %vector.body4575 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7174,7 +7077,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_562: # %vector.body4359 +.LBB5_562: # %vector.body4565 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7196,7 +7099,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_564: # %vector.body4354 +.LBB5_564: # %vector.body4560 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7204,31 +7107,30 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_564 b .LBB5_573 -.LBB5_565: # %vector.body4349.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_566: # %vector.body4349 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_566 +.LBB5_565: # %vector.body4552.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_566: # %vector.body4552 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_566 b .LBB5_573 .LBB5_567: # %.preheader.i2154.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7377,43 +7279,43 @@ init: # @init bnez $a2, .LBB5_572 .LBB5_573: # %set1d.exit1086 move $a0, $zero - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $fp, $sp, 64 # 8-byte Folded Reload - ld.d $ra, $sp, 72 # 8-byte Folded Reload - addi.d $sp, $sp, 80 + ld.d $s2, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload + ld.d $s0, $sp, 72 # 8-byte Folded Reload + ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $ra, $sp, 88 # 8-byte Folded Reload + addi.d $sp, $sp, 96 ret .LBB5_574: # %.preheader.i2177.preheader pcalau12i $a0, %pc_hi20(global_data+640192) addi.d $a0, $a0, %pc_lo12(global_data+640192) b .LBB5_345 -.LBB5_575: # %vector.body4325.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_576: # %vector.body4325 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_576 +.LBB5_575: # %vector.body4525.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_576: # %vector.body4525 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_576 b .LBB5_573 -.LBB5_577: # %vector.body4320.preheader +.LBB5_577: # %vector.body4520.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 @@ -7421,7 +7323,7 @@ init: # @init lu12i.w $a2, 260096 ori $a2, $a2, 8 xvreplgr2vr.w $xr0, $a2 -.LBB5_578: # %vector.body4320 +.LBB5_578: # %vector.body4520 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7429,162 +7331,155 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_578 b .LBB5_573 -.LBB5_579: # %vector.body4310.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_580: # %vector.body4310 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_579: # %vector.body4504.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_580: # %vector.body4504 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_580 -# %bb.581: # %vector.body4315.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_582: # %vector.body4315 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.581: # %vector.body4512.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_582: # %vector.body4512 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_582 b .LBB5_573 -.LBB5_583: # %vector.body4305.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_584: # %vector.body4305 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_584 +.LBB5_583: # %vector.body4496.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_584: # %vector.body4496 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_584 b .LBB5_573 -.LBB5_585: # %vector.body4300.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_586: # %vector.body4300 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_586 +.LBB5_585: # %vector.body4488.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_586: # %vector.body4488 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_586 b .LBB5_573 -.LBB5_587: # %vector.body4295.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_588: # %vector.body4295 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_588 +.LBB5_587: # %vector.body4480.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_588: # %vector.body4480 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_588 b .LBB5_573 -.LBB5_589: # %vector.body4290.preheader +.LBB5_589: # %vector.body4472.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_590: # %vector.body4290 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_590 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_590: # %vector.body4472 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_590 b .LBB5_607 .LBB5_591: # %.preheader.i2230.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7603,79 +7498,74 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a1, $fp, $a0 - ori $a2, $zero, 2 + ori $a0, $a0, 2104 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + add.d $a2, $fp, $a0 lu12i.w $a0, 7 ori $a3, $a0, 3328 -.LBB5_592: # %vector.body4275 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_592: # %vector.body4448 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_592 -# %bb.593: # %vector.body4280.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.593: # %vector.body4456.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_594: # %vector.body4280 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_594: # %vector.body4456 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_594 -# %bb.595: # %vector.body4285.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.595: # %vector.body4464.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_596: # %vector.body4285 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_596: # %vector.body4464 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_596 b .LBB5_573 .LBB5_597: # %.preheader34.i2253.preheader @@ -7734,42 +7624,41 @@ init: # @init lu12i.w $a1, 220 ori $a1, $a1, 1212 b .LBB5_630 -.LBB5_600: # %vector.body4263.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_601: # %vector.body4263 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_601 - b .LBB5_573 -.LBB5_602: # %vector.body4256.preheader +.LBB5_600: # %vector.body4433.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_601: # %vector.body4433 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_601 + b .LBB5_573 +.LBB5_602: # %vector.body4426.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_603: # %vector.body4256 +.LBB5_603: # %vector.body4426 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -7804,45 +7693,44 @@ init: # @init pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) b .LBB5_610 -.LBB5_605: # %vector.body4251.preheader +.LBB5_605: # %vector.body4418.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_606: # %vector.body4251 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_606 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_606: # %vector.body4418 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_606 .LBB5_607: # %set1d.exit2229 lu12i.w $a1, 31 ori $a1, $a1, 1020 lu12i.w $a2, -262144 b .LBB5_687 -.LBB5_608: # %vector.body4246.preheader +.LBB5_608: # %vector.body4413.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_609: # %vector.body4246 +.LBB5_609: # %vector.body4413 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7855,14 +7743,14 @@ init: # @init add.d $a0, $a0, $a2 ori $a2, $a1, 1024 b .LBB5_346 -.LBB5_611: # %vector.body4241.preheader +.LBB5_611: # %vector.body4408.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_612: # %vector.body4241 +.LBB5_612: # %vector.body4408 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7886,147 +7774,141 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 b .LBB5_573 -.LBB5_614: # %vector.body4216.preheader +.LBB5_614: # %vector.body4374.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_615: # %vector.body4216 +.LBB5_615: # %vector.body4374 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_615 -# %bb.616: # %vector.body4221.preheader +# %bb.616: # %vector.body4379.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_617: # %vector.body4221 +.LBB5_617: # %vector.body4379 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_617 -# %bb.618: # %vector.body4226.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.618: # %vector.body4384.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_619: # %vector.body4226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_619: # %vector.body4384 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_619 -# %bb.620: # %vector.body4231.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.620: # %vector.body4392.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_621: # %vector.body4231 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_621: # %vector.body4392 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_621 -# %bb.622: # %vector.body4236.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.622: # %vector.body4400.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_623: # %vector.body4236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_623: # %vector.body4400 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_623 b .LBB5_573 -.LBB5_624: # %vector.body4211.preheader +.LBB5_624: # %vector.body4366.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_625: # %vector.body4211 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_625 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_625: # %vector.body4366 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_625 # %bb.626: # %set1d.exit2327 lu12i.w $a1, 31 b .LBB5_686 -.LBB5_627: # %vector.body4204.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_627: # %vector.body4359.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_628: # %vector.body4204 +.LBB5_628: # %vector.body4359 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8075,81 +7957,77 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ori $a0, $s0, 1044 + ori $a0, $s0, 1048 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_632: # %vector.body4199 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_632 - b .LBB5_573 -.LBB5_633: # %vector.body4189.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_634: # %vector.body4189 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_634 -# %bb.635: # %vector.body4194.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_636: # %vector.body4194 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_632: # %vector.body4351 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_632 + b .LBB5_573 +.LBB5_633: # %vector.body4335.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_634: # %vector.body4335 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_634 +# %bb.635: # %vector.body4343.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_636: # %vector.body4343 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_636 b .LBB5_573 .LBB5_637: # %.preheader34.i2352.preheader @@ -8248,26 +8126,26 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_640 b .LBB5_573 -.LBB5_641: # %vector.body4167.preheader +.LBB5_641: # %vector.body4313.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_642: # %vector.body4167 +.LBB5_642: # %vector.body4313 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_642 -# %bb.643: # %vector.body4172.preheader +# %bb.643: # %vector.body4318.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_644: # %vector.body4172 +.LBB5_644: # %vector.body4318 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8275,78 +8153,75 @@ init: # @init addi.d $a2, $a2, 64 bnez $a1, .LBB5_644 b .LBB5_653 -.LBB5_645: # %vector.body4157.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_646: # %vector.body4157 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_646 -# %bb.647: # %vector.body4162.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_648: # %vector.body4162 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_645: # %vector.body4297.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_646: # %vector.body4297 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_646 +# %bb.647: # %vector.body4305.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_648: # %vector.body4305 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_648 b .LBB5_573 -.LBB5_649: # %vector.body4147.preheader +.LBB5_649: # %vector.body4287.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_650: # %vector.body4147 +.LBB5_650: # %vector.body4287 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_650 -# %bb.651: # %vector.body4152.preheader +# %bb.651: # %vector.body4292.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_652: # %vector.body4152 +.LBB5_652: # %vector.body4292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8359,134 +8234,128 @@ init: # @init lu12i.w $a2, 260096 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_654: # %vector.body4132.preheader +.LBB5_654: # %vector.body4266.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_655: # %vector.body4132 +.LBB5_655: # %vector.body4266 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_655 -# %bb.656: # %vector.body4137.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.656: # %vector.body4271.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_657: # %vector.body4137 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_657: # %vector.body4271 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_657 -# %bb.658: # %vector.body4142.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.658: # %vector.body4279.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_659: # %vector.body4142 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_659: # %vector.body4279 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_659 b .LBB5_573 -.LBB5_660: # %vector.body4117.preheader +.LBB5_660: # %vector.body4245.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_661: # %vector.body4117 +.LBB5_661: # %vector.body4245 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_661 -# %bb.662: # %vector.body4122.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.662: # %vector.body4250.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_663: # %vector.body4122 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_663: # %vector.body4250 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_663 -# %bb.664: # %vector.body4127.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.664: # %vector.body4258.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_665: # %vector.body4127 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_665: # %vector.body4258 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_665 b .LBB5_573 .LBB5_666: # %.preheader.i2421.preheader @@ -8503,74 +8372,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_667: # %vector.body4097 +.LBB5_667: # %vector.body4219 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_667 -# %bb.668: # %vector.body4102.preheader +# %bb.668: # %vector.body4224.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_669: # %vector.body4102 +.LBB5_669: # %vector.body4224 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_669 -# %bb.670: # %vector.body4107.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.670: # %vector.body4229.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_671: # %vector.body4107 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_671: # %vector.body4229 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_671 -# %bb.672: # %vector.body4112.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.672: # %vector.body4237.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_673: # %vector.body4112 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_673: # %vector.body4237 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_673 b .LBB5_573 .LBB5_674: # %.preheader.i2443.preheader @@ -8719,69 +8585,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_679 b .LBB5_573 -.LBB5_680: # %vector.body4063.preheader +.LBB5_680: # %vector.body4179.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 xvldi $xr0, -1424 -.LBB5_681: # %vector.body4063 +.LBB5_681: # %vector.body4179 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a3, $a3, -16 addi.d $a1, $a1, 64 bnez $a3, .LBB5_681 -# %bb.682: # %vector.body4068.preheader +# %bb.682: # %vector.body4184.preheader lu12i.w $a1, 31 - ori $a3, $a1, 1044 - add.d $a3, $a0, $a3 - ori $a4, $zero, 2 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a4, $a1, 1048 + add.d $a4, $a0, $a4 ori $a5, $a2, 3328 -.LBB5_683: # %vector.body4068 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - addi.d $a7, $a4, -1 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa0, $a7 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a5, $a5, -2 - addi.w $a4, $a4, 2 +.LBB5_683: # %vector.body4184 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a4, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a5, $a5, -4 + addi.d $a4, $a4, 16 bnez $a5, .LBB5_683 -# %bb.684: # %vector.body4073.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2100 +# %bb.684: # %vector.body4192.preheader + lu12i.w $a4, 62 + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a3, $a4, 2104 add.d $a3, $a0, $a3 - ori $a4, $zero, 2 ori $a2, $a2, 3328 -.LBB5_685: # %vector.body4073 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a4, 31, 0 - addi.d $a6, $a4, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a2, $a2, -2 - addi.w $a4, $a4, 2 +.LBB5_685: # %vector.body4192 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a3, $a3, 16 bnez $a2, .LBB5_685 .LBB5_686: # %set1d.exit2479 ori $a1, $a1, 1020 @@ -8790,16 +8653,16 @@ init: # @init lu32i.d $a2, 0 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_688: # %vector.body4051.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_688: # %vector.body4167.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_689: # %vector.body4051 +.LBB5_689: # %vector.body4167 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8830,12 +8693,12 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_689 -# %bb.690: # %vector.body4058.preheader +# %bb.690: # %vector.body4174.preheader pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 xvldi $xr0, -1424 -.LBB5_691: # %vector.body4058 +.LBB5_691: # %vector.body4174 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -8843,28 +8706,28 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_691 b .LBB5_573 -.LBB5_692: # %vector.body4039.preheader +.LBB5_692: # %vector.body4155.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_693: # %vector.body4039 +.LBB5_693: # %vector.body4155 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_693 -# %bb.694: # %vector.body4044.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.694: # %vector.body4160.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_695: # %vector.body4044 +.LBB5_695: # %vector.body4160 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8904,15 +8767,15 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_697: # %vector.body4032 +.LBB5_697: # %vector.body4148 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8944,28 +8807,28 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_697 b .LBB5_573 -.LBB5_698: # %vector.body4020.preheader +.LBB5_698: # %vector.body4136.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_699: # %vector.body4020 +.LBB5_699: # %vector.body4136 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_699 -# %bb.700: # %vector.body4025.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.700: # %vector.body4141.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_701: # %vector.body4025 +.LBB5_701: # %vector.body4141 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8997,30 +8860,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_701 b .LBB5_573 -.LBB5_702: # %vector.body4008.preheader +.LBB5_702: # %vector.body4124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_703: # %vector.body4008 +.LBB5_703: # %vector.body4124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_703 -# %bb.704: # %vector.body4013.preheader +# %bb.704: # %vector.body4129.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_705: # %vector.body4013 +.LBB5_705: # %vector.body4129 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9052,30 +8915,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_705 b .LBB5_573 -.LBB5_706: # %vector.body3996.preheader +.LBB5_706: # %vector.body4112.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_707: # %vector.body3996 +.LBB5_707: # %vector.body4112 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_707 -# %bb.708: # %vector.body4001.preheader +# %bb.708: # %vector.body4117.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_709: # %vector.body4001 +.LBB5_709: # %vector.body4117 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9107,7 +8970,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_709 b .LBB5_573 -.LBB5_710: # %vector.body3971.preheader +.LBB5_710: # %vector.body4081.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 @@ -9115,7 +8978,7 @@ init: # @init ori $a1, $s0, 3328 xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill -.LBB5_711: # %vector.body3971 +.LBB5_711: # %vector.body4081 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9123,63 +8986,60 @@ init: # @init addi.d $a1, $a1, -16 addi.d $a0, $a0, 64 bnez $a1, .LBB5_711 -# %bb.712: # %vector.body3976.preheader +# %bb.712: # %vector.body4086.preheader lu12i.w $a1, 31 - ori $a0, $a1, 1044 - add.d $a0, $fp, $a0 - ori $a2, $zero, 2 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a2, $a1, 1048 + add.d $a2, $fp, $a2 ori $a3, $s0, 3328 -.LBB5_713: # %vector.body3976 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_713: # %vector.body4086 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_713 -# %bb.714: # %vector.body3981.preheader - lu12i.w $a0, 62 - ori $a0, $a0, 2100 +# %bb.714: # %vector.body4094.preheader + lu12i.w $a2, 62 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a2, 2104 add.d $a0, $fp, $a0 - ori $a2, $zero, 2 - ori $a3, $s0, 3328 -.LBB5_715: # %vector.body3981 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_715 -# %bb.716: # %vector.body3986.preheader + ori $a2, $s0, 3328 +.LBB5_715: # %vector.body4094 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a0, $a0, 16 + bnez $a2, .LBB5_715 +# %bb.716: # %vector.body4102.preheader lu12i.w $a0, 93 ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 xvldi $xr0, -1296 -.LBB5_717: # %vector.body3986 +.LBB5_717: # %vector.body4102 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -9198,7 +9058,7 @@ init: # @init ori $a0, $a0, 2500 add.d $a0, $fp, $a0 ori $a1, $s0, 3328 -.LBB5_719: # %vector.body3991 +.LBB5_719: # %vector.body4107 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9207,238 +9067,225 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_719 b .LBB5_573 -.LBB5_720: # %vector.body3946.preheader +.LBB5_720: # %vector.body4044.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_721: # %vector.body3946 +.LBB5_721: # %vector.body4044 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_721 -# %bb.722: # %vector.body3951.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.722: # %vector.body4049.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_723: # %vector.body3951 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_723: # %vector.body4049 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_723 -# %bb.724: # %vector.body3956.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.724: # %vector.body4057.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_725: # %vector.body3956 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_725: # %vector.body4057 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_725 -# %bb.726: # %vector.body3961.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.726: # %vector.body4065.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_727: # %vector.body3961 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_727: # %vector.body4065 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_727 -# %bb.728: # %vector.body3966.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.728: # %vector.body4073.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_729: # %vector.body3966 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_729: # %vector.body4073 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_729 b .LBB5_573 -.LBB5_730: # %vector.body3931.preheader +.LBB5_730: # %vector.body4023.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_731: # %vector.body3931 +.LBB5_731: # %vector.body4023 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_731 -# %bb.732: # %vector.body3936.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.732: # %vector.body4028.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_733: # %vector.body3936 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_733: # %vector.body4028 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_733 -# %bb.734: # %vector.body3941.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.734: # %vector.body4036.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_735: # %vector.body3941 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_735: # %vector.body4036 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_735 b .LBB5_573 -.LBB5_736: # %vector.body3921.preheader +.LBB5_736: # %vector.body4007.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_737: # %vector.body3921 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_737: # %vector.body4007 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_737 -# %bb.738: # %vector.body3926.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.738: # %vector.body4015.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_739: # %vector.body3926 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_739: # %vector.body4015 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_739 b .LBB5_573 .LBB5_740: # %.preheader.i2608.preheader @@ -9455,14 +9302,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_741: # %vector.body3911 +.LBB5_741: # %vector.body3997 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_741 -# %bb.742: # %vector.body3916.preheader +# %bb.742: # %vector.body4002.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -9470,7 +9317,7 @@ init: # @init lu12i.w $a2, 219235 ori $a2, $a2, 1981 xvreplgr2vr.w $xr0, $a2 -.LBB5_743: # %vector.body3916 +.LBB5_743: # %vector.body4002 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -9488,14 +9335,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_745: # %vector.body3904 +.LBB5_745: # %vector.body3990 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9527,248 +9374,237 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_745 b .LBB5_573 -.LBB5_746: # %vector.body3879.preheader +.LBB5_746: # %vector.body3959.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_747: # %vector.body3879 +.LBB5_747: # %vector.body3959 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_747 -# %bb.748: # %vector.body3884.preheader +# %bb.748: # %vector.body3964.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_749: # %vector.body3884 +.LBB5_749: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_749 -# %bb.750: # %vector.body3889.preheader +# %bb.750: # %vector.body3969.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_751: # %vector.body3889 +.LBB5_751: # %vector.body3969 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_751 -# %bb.752: # %vector.body3894.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.752: # %vector.body3974.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_753: # %vector.body3894 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_753: # %vector.body3974 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_753 -# %bb.754: # %vector.body3899.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.754: # %vector.body3982.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_755: # %vector.body3899 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_755: # %vector.body3982 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_755 b .LBB5_573 -.LBB5_756: # %vector.body3859.preheader +.LBB5_756: # %vector.body3930.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_757: # %vector.body3859 +.LBB5_757: # %vector.body3930 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_757 -# %bb.758: # %vector.body3864.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.758: # %vector.body3935.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_759: # %vector.body3864 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_759: # %vector.body3935 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_759 -# %bb.760: # %vector.body3869.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.760: # %vector.body3943.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_761: # %vector.body3869 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_761: # %vector.body3943 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_761 -# %bb.762: # %vector.body3874.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.762: # %vector.body3951.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_763: # %vector.body3874 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_763: # %vector.body3951 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_763 b .LBB5_573 -.LBB5_764: # %vector.body3844.preheader +.LBB5_764: # %vector.body3909.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_765: # %vector.body3844 +.LBB5_765: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_765 -# %bb.766: # %vector.body3849.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.766: # %vector.body3914.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_767: # %vector.body3849 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_767: # %vector.body3914 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_767 -# %bb.768: # %vector.body3854.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.768: # %vector.body3922.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_769: # %vector.body3854 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_769: # %vector.body3922 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_769 b .LBB5_573 .LBB5_770: # %.preheader.i2684.preheader @@ -9785,103 +9621,99 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_771: # %vector.body3829 +.LBB5_771: # %vector.body3888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_771 -# %bb.772: # %vector.body3834.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.772: # %vector.body3893.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_773: # %vector.body3834 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_773: # %vector.body3893 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_773 -# %bb.774: # %vector.body3839.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.774: # %vector.body3901.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_775: # %vector.body3839 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_775: # %vector.body3901 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_775 b .LBB5_573 -.LBB5_776: # %vector.body3819.preheader +.LBB5_776: # %vector.body3875.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 32 - lu12i.w $a2, 7 - ori $a3, $a2, 3328 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_777: # %vector.body3819 +.LBB5_777: # %vector.body3875 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 - addi.d $a1, $a1, 64 + addi.d $a2, $a2, 64 bnez $a3, .LBB5_777 -# %bb.778: # %vector.body3824.preheader - lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a0, $a0, $a1 - ori $a1, $zero, 2 - ori $a2, $a2, 3328 -.LBB5_779: # %vector.body3824 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_779 +# %bb.778: # %vector.body3880.preheader + lu12i.w $a2, 31 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 1048 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_779: # %vector.body3880 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_779 b .LBB5_573 .LBB5_780: # %.preheader.i2711.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9897,23 +9729,23 @@ init: # @init lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_781: # %vector.body3807 +.LBB5_781: # %vector.body3863 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_781 -# %bb.782: # %vector.body3812.preheader +# %bb.782: # %vector.body3868.preheader lu12i.w $a0, 62 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) pcalau12i $a2, %pc_hi20(.LCPI5_1) - xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_2) + xvld $xr1, $a2, %pc_lo12(.LCPI5_2) ori $a0, $a0, 2096 add.d $a0, $fp, $a0 ori $a1, $a1, 3328 -.LBB5_783: # %vector.body3812 +.LBB5_783: # %vector.body3868 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9959,142 +9791,135 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_785: # %vector.body3792 +.LBB5_785: # %vector.body3842 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_785 -# %bb.786: # %vector.body3797.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.786: # %vector.body3847.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_787: # %vector.body3797 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_787: # %vector.body3847 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_787 -# %bb.788: # %vector.body3802.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.788: # %vector.body3855.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_789: # %vector.body3802 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_789: # %vector.body3855 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_789 b .LBB5_573 -.LBB5_790: # %vector.body3782.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_791: # %vector.body3782 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_790: # %vector.body3826.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_791: # %vector.body3826 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_791 -# %bb.792: # %vector.body3787.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_793: # %vector.body3787 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.792: # %vector.body3834.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_793: # %vector.body3834 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_793 b .LBB5_573 -.LBB5_794: # %vector.body3770.preheader +.LBB5_794: # %vector.body3811.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_795: # %vector.body3770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_795 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_795: # %vector.body3811 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_795 # %bb.796: # %.preheader34.i2757.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -10160,127 +9985,121 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_799: # %vector.body3755 +.LBB5_799: # %vector.body3790 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_799 -# %bb.800: # %vector.body3760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.800: # %vector.body3795.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_801: # %vector.body3760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_801: # %vector.body3795 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_801 -# %bb.802: # %vector.body3765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.802: # %vector.body3803.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_803: # %vector.body3765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_803: # %vector.body3803 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_803 b .LBB5_573 -.LBB5_804: # %vector.body3740.preheader +.LBB5_804: # %vector.body3769.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_805: # %vector.body3740 +.LBB5_805: # %vector.body3769 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_805 -# %bb.806: # %vector.body3745.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.806: # %vector.body3774.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_807: # %vector.body3745 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_807: # %vector.body3774 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_807 -# %bb.808: # %vector.body3750.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.808: # %vector.body3782.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_809: # %vector.body3750 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_809: # %vector.body3782 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_809 b .LBB5_573 .LBB5_810: # %.preheader.i2797.preheader @@ -10293,14 +10112,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_811: # %vector.body3733 +.LBB5_811: # %vector.body3762 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10342,14 +10161,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_813: # %vector.body3726 +.LBB5_813: # %vector.body3755 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10391,14 +10210,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_815: # %vector.body3719 +.LBB5_815: # %vector.body3748 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10440,14 +10259,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_817: # %vector.body3712 +.LBB5_817: # %vector.body3741 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10489,14 +10308,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_819: # %vector.body3705 +.LBB5_819: # %vector.body3734 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10528,26 +10347,26 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_819 b .LBB5_573 -.LBB5_820: # %vector.body3695.preheader +.LBB5_820: # %vector.body3724.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_821: # %vector.body3695 +.LBB5_821: # %vector.body3724 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_821 -# %bb.822: # %vector.body3700.preheader +# %bb.822: # %vector.body3729.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_823: # %vector.body3700 +.LBB5_823: # %vector.body3729 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10555,95 +10374,92 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_823 b .LBB5_573 -.LBB5_824: # %vector.body3680.preheader +.LBB5_824: # %vector.body3703.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_825: # %vector.body3680 +.LBB5_825: # %vector.body3703 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_825 -# %bb.826: # %vector.body3685.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.826: # %vector.body3708.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_827: # %vector.body3685 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_827: # %vector.body3708 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_827 -# %bb.828: # %vector.body3690.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.828: # %vector.body3716.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_829: # %vector.body3690 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_829: # %vector.body3716 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_829 b .LBB5_573 -.LBB5_830: # %vector.body3668.preheader +.LBB5_830: # %vector.body3691.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_831: # %vector.body3668 +.LBB5_831: # %vector.body3691 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_831 -# %bb.832: # %vector.body3673.preheader +# %bb.832: # %vector.body3696.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_833: # %vector.body3673 +.LBB5_833: # %vector.body3696 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10675,16 +10491,16 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_833 b .LBB5_573 -.LBB5_834: # %vector.body3653.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_834: # %vector.body3674.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_835: # %vector.body3653 +.LBB5_835: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10715,7 +10531,7 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_835 -# %bb.836: # %vector.body3658.preheader +# %bb.836: # %vector.body3681.preheader pcalau12i $a1, %pc_hi20(global_data) addi.d $a1, $a1, %pc_lo12(global_data) lu12i.w $a2, 31 @@ -10723,20 +10539,20 @@ init: # @init add.d $a2, $a1, $a2 ori $a3, $a0, 3328 xvldi $xr0, -1424 -.LBB5_837: # %vector.body3658 +.LBB5_837: # %vector.body3681 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_837 -# %bb.838: # %vector.body3663.preheader +# %bb.838: # %vector.body3686.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 xvldi $xr0, -1296 -.LBB5_839: # %vector.body3663 +.LBB5_839: # %vector.body3686 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -10744,40 +10560,40 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_839 b .LBB5_573 -.LBB5_840: # %vector.body3638.preheader +.LBB5_840: # %vector.body3659.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_841: # %vector.body3638 +.LBB5_841: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_841 -# %bb.842: # %vector.body3643.preheader +# %bb.842: # %vector.body3664.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -3264 -.LBB5_843: # %vector.body3643 +.LBB5_843: # %vector.body3664 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_843 -# %bb.844: # %vector.body3648.preheader +# %bb.844: # %vector.body3669.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3265 -.LBB5_845: # %vector.body3648 +.LBB5_845: # %vector.body3669 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10785,82 +10601,78 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_845 b .LBB5_573 -.LBB5_846: # %vector.body3633.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_847: # %vector.body3633 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_847 - b .LBB5_573 -.LBB5_848: # %vector.body3623.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 +.LBB5_846: # %vector.body3651.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_849: # %vector.body3623 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_849 -# %bb.850: # %vector.body3628.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_851: # %vector.body3628 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_847: # %vector.body3651 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_847 + b .LBB5_573 +.LBB5_848: # %vector.body3635.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_849: # %vector.body3635 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_849 +# %bb.850: # %vector.body3643.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_851: # %vector.body3643 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_851 b .LBB5_573 .Lfunc_end5: diff --git a/results/MultiSource/Benchmarks/TSVC/Searching-flt/CMakeFiles/Searching-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Searching-flt/CMakeFiles/Searching-flt.dir/tsc.s index aa84364..381fcaf 100644 --- a/results/MultiSource/Benchmarks/TSVC/Searching-flt/CMakeFiles/Searching-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Searching-flt/CMakeFiles/Searching-flt.dir/tsc.s @@ -11,6 +11,11 @@ .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI0_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1d .p2align 5 @@ -63,30 +68,29 @@ set1d: # @set1d bnez $a1, .LBB0_3 b .LBB0_8 .LBB0_4: # %vector.body40.preheader - addi.d $a0, $a0, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a1, %pc_hi20(.LCPI0_2) + vld $vr0, $a1, %pc_lo12(.LCPI0_2) + addi.d $a0, $a0, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB0_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB0_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB0_5 b .LBB0_8 .LBB0_6: # %.preheader.preheader move $a2, $zero @@ -118,6 +122,11 @@ set1d: # @set1d .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI1_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1ds .p2align 5 @@ -170,30 +179,29 @@ set1ds: # @set1ds bnez $a0, .LBB1_3 b .LBB1_8 .LBB1_4: # %vector.body40.preheader - addi.d $a0, $a1, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) + addi.d $a0, $a1, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB1_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB1_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB1_5 b .LBB1_8 .LBB1_6: # %.preheader.preheader move $a0, $zero @@ -685,14 +693,19 @@ check: # @check .word .LBB4_14-.LJTI4_0 .word .LBB4_17-.LJTI4_0 # -- End function - .section .rodata.cst32,"aM",@progbits,32 - .p2align 5, 0x0 # -- Begin function init + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 # -- Begin function init .LCPI5_0: + .dword 0 # 0x0 + .dword 1 # 0x1 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 +.LCPI5_1: .dword 4 # 0x4 .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 -.LCPI5_1: +.LCPI5_2: .dword 0 # 0x0 .dword 1 # 0x1 .dword 2 # 0x2 @@ -703,11 +716,12 @@ check: # @check .type init,@function init: # @init # %bb.0: - addi.d $sp, $sp, -80 - st.d $ra, $sp, 72 # 8-byte Folded Spill - st.d $fp, $sp, 64 # 8-byte Folded Spill - st.d $s0, $sp, 56 # 8-byte Folded Spill - st.d $s1, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -96 + st.d $ra, $sp, 88 # 8-byte Folded Spill + st.d $fp, $sp, 80 # 8-byte Folded Spill + st.d $s0, $sp, 72 # 8-byte Folded Spill + st.d $s1, $sp, 64 # 8-byte Folded Spill + st.d $s2, $sp, 56 # 8-byte Folded Spill move $fp, $a0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a1, $a0, %pc_lo12(.L.str.1) @@ -1661,131 +1675,122 @@ init: # @init jirl $ra, $ra, 0 bnez $a0, .LBB5_573 # %bb.136: # %vector.body.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 + addi.d $a3, $a0, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 .LBB5_137: # %vector.body # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_137 # %bb.138: # %vector.body3598.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 + lu12i.w $a3, 31 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 .LBB5_139: # %vector.body3598 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_139 -# %bb.140: # %vector.body3603.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_141: # %vector.body3603 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.140: # %vector.body3606.preheader + lu12i.w $a3, 62 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_141: # %vector.body3606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_141 -# %bb.142: # %vector.body3608.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_143: # %vector.body3608 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.142: # %vector.body3614.preheader + lu12i.w $a3, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_143: # %vector.body3614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_143 -# %bb.144: # %vector.body3613.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_145: # %vector.body3613 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 - bnez $a1, .LBB5_145 +# %bb.144: # %vector.body3622.preheader + lu12i.w $a3, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a3, 136 + add.d $a1, $a0, $a1 + ori $a2, $a2, 3328 +.LBB5_145: # %vector.body3622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_145 # %bb.146: # %.preheader34.i2938.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -1837,7 +1842,7 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_147 b .LBB5_573 -.LBB5_148: # %vector.body5445.preheader +.LBB5_148: # %vector.body5846.preheader pcalau12i $a0, %pc_hi20(X+4) addi.d $a0, $a0, %pc_lo12(X+4) pcalau12i $a1, %pc_hi20(V+4) @@ -1852,7 +1857,7 @@ init: # @init lu12i.w $a6, 7 ori $a6, $a6, 3328 .p2align 4, , 16 -.LBB5_149: # %vector.body5445 +.LBB5_149: # %vector.body5846 # =>This Inner Loop Header: Depth=1 bstrpick.d $a7, $a5, 31, 0 addi.d $t0, $a5, -5 @@ -1896,7 +1901,7 @@ init: # @init addi.d $a4, $a4, 8 bnez $a6, .LBB5_149 b .LBB5_573 -.LBB5_150: # %vector.body5412.preheader +.LBB5_150: # %vector.body5813.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -1904,24 +1909,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_151: # %vector.body5412 +.LBB5_151: # %vector.body5813 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_151 -# %bb.152: # %vector.body5417.preheader +# %bb.152: # %vector.body5818.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_153: # %vector.body5417 +.LBB5_153: # %vector.body5818 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1952,15 +1957,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_153 -# %bb.154: # %vector.body5424.preheader +# %bb.154: # %vector.body5825.preheader lu12i.w $a4, 62 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 2096 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_155: # %vector.body5424 +.LBB5_155: # %vector.body5825 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1991,15 +1996,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_155 -# %bb.156: # %vector.body5431.preheader +# %bb.156: # %vector.body5832.preheader lu12i.w $a4, 93 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 3152 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_157: # %vector.body5431 +.LBB5_157: # %vector.body5832 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2030,15 +2035,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_157 -# %bb.158: # %vector.body5438.preheader +# %bb.158: # %vector.body5839.preheader lu12i.w $a4, 125 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_159: # %vector.body5438 +.LBB5_159: # %vector.body5839 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2070,7 +2075,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_159 b .LBB5_573 -.LBB5_160: # %vector.body5400.preheader +.LBB5_160: # %vector.body5801.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2078,24 +2083,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_161: # %vector.body5400 +.LBB5_161: # %vector.body5801 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_161 -# %bb.162: # %vector.body5405.preheader +# %bb.162: # %vector.body5806.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_163: # %vector.body5405 +.LBB5_163: # %vector.body5806 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2127,7 +2132,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_163 b .LBB5_573 -.LBB5_164: # %vector.body5388.preheader +.LBB5_164: # %vector.body5789.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2135,24 +2140,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_165: # %vector.body5388 +.LBB5_165: # %vector.body5789 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_165 -# %bb.166: # %vector.body5393.preheader +# %bb.166: # %vector.body5794.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_167: # %vector.body5393 +.LBB5_167: # %vector.body5794 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2288,7 +2293,7 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_171 b .LBB5_573 -.LBB5_172: # %vector.body5354.preheader +.LBB5_172: # %vector.body5755.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2296,7 +2301,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_173: # %vector.body5354 +.LBB5_173: # %vector.body5755 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2436,14 +2441,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_179 b .LBB5_573 -.LBB5_180: # %vector.body5349.preheader +.LBB5_180: # %vector.body5750.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_181: # %vector.body5349 +.LBB5_181: # %vector.body5750 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -2451,7 +2456,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_181 b .LBB5_573 -.LBB5_182: # %vector.body5339.preheader +.LBB5_182: # %vector.body5740.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2459,7 +2464,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_183: # %vector.body5339 +.LBB5_183: # %vector.body5740 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2610,30 +2615,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_189 b .LBB5_573 -.LBB5_190: # %vector.body5315.preheader +.LBB5_190: # %vector.body5716.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_191: # %vector.body5315 +.LBB5_191: # %vector.body5716 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_191 -# %bb.192: # %vector.body5320.preheader +# %bb.192: # %vector.body5721.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_193: # %vector.body5320 +.LBB5_193: # %vector.body5721 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2665,30 +2670,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_193 b .LBB5_573 -.LBB5_194: # %vector.body5303.preheader +.LBB5_194: # %vector.body5704.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_195: # %vector.body5303 +.LBB5_195: # %vector.body5704 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_195 -# %bb.196: # %vector.body5308.preheader +# %bb.196: # %vector.body5709.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_197: # %vector.body5308 +.LBB5_197: # %vector.body5709 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2734,74 +2739,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_199: # %vector.body5283 +.LBB5_199: # %vector.body5678 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_199 -# %bb.200: # %vector.body5288.preheader +# %bb.200: # %vector.body5683.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_201: # %vector.body5288 +.LBB5_201: # %vector.body5683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_201 -# %bb.202: # %vector.body5293.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.202: # %vector.body5688.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_203: # %vector.body5293 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_203: # %vector.body5688 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_203 -# %bb.204: # %vector.body5298.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.204: # %vector.body5696.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_205: # %vector.body5298 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_205: # %vector.body5696 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_205 b .LBB5_573 .LBB5_206: # %.preheader.i1195.preheader @@ -2818,74 +2820,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_207: # %vector.body5263 +.LBB5_207: # %vector.body5652 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_207 -# %bb.208: # %vector.body5268.preheader +# %bb.208: # %vector.body5657.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_209: # %vector.body5268 +.LBB5_209: # %vector.body5657 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_209 -# %bb.210: # %vector.body5273.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.210: # %vector.body5662.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_211: # %vector.body5273 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_211: # %vector.body5662 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_211 -# %bb.212: # %vector.body5278.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.212: # %vector.body5670.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_213: # %vector.body5278 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_213: # %vector.body5670 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_213 b .LBB5_573 .LBB5_214: # %.preheader.i1217.preheader @@ -3072,31 +3071,30 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_221 -# %bb.222: # %vector.body5236.preheader - pcalau12i $a1, %pc_hi20(array+4) - addi.d $a1, $a1, %pc_lo12(array+4) - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_223: # %vector.body5236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_223 +# %bb.222: # %vector.body5622.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(array+8) + addi.d $a1, $a1, %pc_lo12(array+8) + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_223: # %vector.body5622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_223 # %bb.224: # %.preheader34.i1253.preheader lu12i.w $a1, 284 ori $a1, $a1, 1888 @@ -3162,87 +3160,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_227: # %vector.body5211 +.LBB5_227: # %vector.body5588 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_227 -# %bb.228: # %vector.body5216.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.228: # %vector.body5593.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_229: # %vector.body5216 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_229: # %vector.body5593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_229 -# %bb.230: # %vector.body5221.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.230: # %vector.body5601.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_231: # %vector.body5221 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_231: # %vector.body5601 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_231 -# %bb.232: # %vector.body5226.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.232: # %vector.body5609.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_233: # %vector.body5226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_233: # %vector.body5609 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_233 b .LBB5_573 .LBB5_234: # %.preheader.i1284.preheader @@ -3259,32 +3252,32 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -3264 -.LBB5_235: # %vector.body5196 +.LBB5_235: # %vector.body5573 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_235 -# %bb.236: # %vector.body5201.preheader +# %bb.236: # %vector.body5578.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_237: # %vector.body5201 +.LBB5_237: # %vector.body5578 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_237 -# %bb.238: # %vector.body5206.preheader +# %bb.238: # %vector.body5583.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_239: # %vector.body5206 +.LBB5_239: # %vector.body5583 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -3292,30 +3285,30 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_239 b .LBB5_573 -.LBB5_240: # %vector.body5184.preheader +.LBB5_240: # %vector.body5561.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_241: # %vector.body5184 +.LBB5_241: # %vector.body5561 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_241 -# %bb.242: # %vector.body5189.preheader +# %bb.242: # %vector.body5566.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_243: # %vector.body5189 +.LBB5_243: # %vector.body5566 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3392,65 +3385,62 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_245 -# %bb.246: # %vector.body5174.preheader +# %bb.246: # %vector.body5545.preheader lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_247: # %vector.body5174 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_247: # %vector.body5545 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_247 -# %bb.248: # %vector.body5179.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.248: # %vector.body5553.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_249: # %vector.body5179 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_249: # %vector.body5553 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_249 b .LBB5_573 -.LBB5_250: # %vector.body5157.preheader +.LBB5_250: # %vector.body5528.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_251: # %vector.body5157 +.LBB5_251: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3506,30 +3496,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_253 b .LBB5_573 -.LBB5_254: # %vector.body5145.preheader +.LBB5_254: # %vector.body5516.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_255: # %vector.body5145 +.LBB5_255: # %vector.body5516 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_255 -# %bb.256: # %vector.body5150.preheader +# %bb.256: # %vector.body5521.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_257: # %vector.body5150 +.LBB5_257: # %vector.body5521 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3561,14 +3551,14 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_257 b .LBB5_573 -.LBB5_258: # %vector.body5125.preheader +.LBB5_258: # %vector.body5487.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 xvldi $xr0, -1424 -.LBB5_259: # %vector.body5125 +.LBB5_259: # %vector.body5487 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3583,274 +3573,263 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 + lu12i.w $a1, 62 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 2104 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_261: # %vector.body5130 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_261: # %vector.body5492 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_261 -# %bb.262: # %vector.body5135.preheader - lu12i.w $a0, 93 - ori $a0, $a0, 3156 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 +# %bb.262: # %vector.body5500.preheader + lu12i.w $a1, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 3160 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_263: # %vector.body5135 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_263: # %vector.body5500 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_263 -# %bb.264: # %vector.body5140.preheader - lu12i.w $a0, 125 - ori $a0, $a0, 132 +# %bb.264: # %vector.body5508.preheader + lu12i.w $a1, 125 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a1, 136 add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - ori $a2, $s0, 3328 -.LBB5_265: # %vector.body5140 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_265 + ori $a1, $s0, 3328 +.LBB5_265: # %vector.body5508 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_265 b .LBB5_573 -.LBB5_266: # %vector.body5094.preheader +.LBB5_266: # %vector.body5450.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_267: # %vector.body5094 +.LBB5_267: # %vector.body5450 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_267 -# %bb.268: # %vector.body5099.preheader +# %bb.268: # %vector.body5455.preheader lu12i.w $a2, 31 ori $a3, $a2, 1048 add.d $a4, $a0, $a3 lu12i.w $a3, 3 ori $a5, $a3, 3712 lu12i.w $a6, 260096 -.LBB5_269: # %vector.body5099 +.LBB5_269: # %vector.body5455 # =>This Inner Loop Header: Depth=1 st.w $a6, $a4, -8 st.w $a6, $a4, 0 addi.d $a5, $a5, -2 addi.d $a4, $a4, 16 bnez $a5, .LBB5_269 -# %bb.270: # %vector.body5104.preheader +# %bb.270: # %vector.body5460.preheader ori $a2, $a2, 1052 add.d $a2, $a0, $a2 ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 -.LBB5_271: # %vector.body5104 +.LBB5_271: # %vector.body5460 # =>This Inner Loop Header: Depth=1 st.w $a4, $a2, -8 st.w $a4, $a2, 0 addi.d $a3, $a3, -2 addi.d $a2, $a2, 16 bnez $a3, .LBB5_271 -# %bb.272: # %vector.body5110.preheader +# %bb.272: # %vector.body5466.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_273: # %vector.body5110 +.LBB5_273: # %vector.body5466 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_273 -# %bb.274: # %vector.body5115.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.274: # %vector.body5471.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_275: # %vector.body5115 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_275: # %vector.body5471 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_275 -# %bb.276: # %vector.body5120.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.276: # %vector.body5479.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_277: # %vector.body5120 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_277: # %vector.body5479 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_277 b .LBB5_573 -.LBB5_278: # %vector.body5079.preheader +.LBB5_278: # %vector.body5429.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_279: # %vector.body5079 +.LBB5_279: # %vector.body5429 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_279 -# %bb.280: # %vector.body5084.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.280: # %vector.body5434.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_281: # %vector.body5084 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_281: # %vector.body5434 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_281 -# %bb.282: # %vector.body5089.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.282: # %vector.body5442.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_283: # %vector.body5089 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_283: # %vector.body5442 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_283 b .LBB5_573 -.LBB5_284: # %vector.body5067.preheader +.LBB5_284: # %vector.body5417.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_285: # %vector.body5067 +.LBB5_285: # %vector.body5417 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_285 -# %bb.286: # %vector.body5072.preheader +# %bb.286: # %vector.body5422.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_287: # %vector.body5072 +.LBB5_287: # %vector.body5422 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3882,30 +3861,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_287 b .LBB5_573 -.LBB5_288: # %vector.body5055.preheader +.LBB5_288: # %vector.body5405.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_289: # %vector.body5055 +.LBB5_289: # %vector.body5405 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_289 -# %bb.290: # %vector.body5060.preheader +# %bb.290: # %vector.body5410.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_291: # %vector.body5060 +.LBB5_291: # %vector.body5410 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3937,30 +3916,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_291 b .LBB5_573 -.LBB5_292: # %vector.body5043.preheader +.LBB5_292: # %vector.body5393.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_293: # %vector.body5043 +.LBB5_293: # %vector.body5393 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_293 -# %bb.294: # %vector.body5048.preheader +# %bb.294: # %vector.body5398.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_295: # %vector.body5048 +.LBB5_295: # %vector.body5398 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3992,30 +3971,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_295 b .LBB5_573 -.LBB5_296: # %vector.body5031.preheader +.LBB5_296: # %vector.body5381.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_297: # %vector.body5031 +.LBB5_297: # %vector.body5381 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_297 -# %bb.298: # %vector.body5036.preheader +# %bb.298: # %vector.body5386.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_299: # %vector.body5036 +.LBB5_299: # %vector.body5386 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4047,30 +4026,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_299 b .LBB5_573 -.LBB5_300: # %vector.body5019.preheader +.LBB5_300: # %vector.body5369.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_301: # %vector.body5019 +.LBB5_301: # %vector.body5369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_301 -# %bb.302: # %vector.body5024.preheader +# %bb.302: # %vector.body5374.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_303: # %vector.body5024 +.LBB5_303: # %vector.body5374 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4102,69 +4081,66 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_303 b .LBB5_573 -.LBB5_304: # %vector.body5004.preheader +.LBB5_304: # %vector.body5348.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_305: # %vector.body5004 +.LBB5_305: # %vector.body5348 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_305 -# %bb.306: # %vector.body5009.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.306: # %vector.body5353.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_307: # %vector.body5009 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_307: # %vector.body5353 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_307 -# %bb.308: # %vector.body5014.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.308: # %vector.body5361.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_309: # %vector.body5014 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_309: # %vector.body5361 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_309 b .LBB5_573 .LBB5_310: # %.preheader.i1469.preheader @@ -4181,254 +4157,241 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_311: # %vector.body4984 +.LBB5_311: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_311 -# %bb.312: # %vector.body4989.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.312: # %vector.body5324.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_313: # %vector.body4989 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_313: # %vector.body5324 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_313 -# %bb.314: # %vector.body4994.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.314: # %vector.body5332.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_315: # %vector.body4994 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_315: # %vector.body5332 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_315 -# %bb.316: # %vector.body4999.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.316: # %vector.body5340.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_317: # %vector.body4999 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_317: # %vector.body5340 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_317 b .LBB5_573 -.LBB5_318: # %vector.body4964.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_319: # %vector.body4964 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_318: # %vector.body5293.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_319: # %vector.body5293 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_319 -# %bb.320: # %vector.body4969.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1072 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 +# %bb.320: # %vector.body5301.preheader + lu12i.w $a3, 31 + ori $a3, $a3, 1072 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 xvldi $xr0, -1424 -.LBB5_321: # %vector.body4969 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_321 -# %bb.322: # %vector.body4974.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2128 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 -.LBB5_323: # %vector.body4974 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_323 -# %bb.324: # %vector.body4979.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_325: # %vector.body4979 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_321: # %vector.body5301 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_321 +# %bb.322: # %vector.body5306.preheader + lu12i.w $a3, 62 + ori $a3, $a3, 2128 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 +.LBB5_323: # %vector.body5306 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_323 +# %bb.324: # %vector.body5311.preheader + lu12i.w $a3, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 3160 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_325: # %vector.body5311 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_325 b .LBB5_573 -.LBB5_326: # %vector.body4944.preheader +.LBB5_326: # %vector.body5264.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_327: # %vector.body4944 +.LBB5_327: # %vector.body5264 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_327 -# %bb.328: # %vector.body4949.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.328: # %vector.body5269.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_329: # %vector.body4949 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_329: # %vector.body5269 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_329 -# %bb.330: # %vector.body4954.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.330: # %vector.body5277.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_331: # %vector.body4954 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_331: # %vector.body5277 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_331 -# %bb.332: # %vector.body4959.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.332: # %vector.body5285.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_333: # %vector.body4959 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_333: # %vector.body5285 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_333 b .LBB5_573 .LBB5_334: # %.preheader.i1529.preheader @@ -4445,19 +4408,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_335: # %vector.body4934 +.LBB5_335: # %vector.body5254 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_335 -# %bb.336: # %vector.body4939.preheader +# %bb.336: # %vector.body5259.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_337: # %vector.body4939 +.LBB5_337: # %vector.body5259 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -4915,69 +4878,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_358 b .LBB5_573 -.LBB5_359: # %vector.body4850.preheader +.LBB5_359: # %vector.body5164.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_360: # %vector.body4850 +.LBB5_360: # %vector.body5164 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_360 -# %bb.361: # %vector.body4855.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.361: # %vector.body5169.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_362: # %vector.body4855 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a5 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_362: # %vector.body5169 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a3, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_362 -# %bb.363: # %vector.body4860.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.363: # %vector.body5177.preheader + lu12i.w $a3, 62 + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 ori $a1, $a1, 3328 -.LBB5_364: # %vector.body4860 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a4 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 +.LBB5_364: # %vector.body5177 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a2, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a1, $a1, -4 + addi.d $a2, $a2, 16 bnez $a1, .LBB5_364 # %bb.365: # %.preheader.i1631.preheader lu12i.w $a1, 156 @@ -5071,50 +5031,50 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_368 b .LBB5_573 -.LBB5_369: # %vector.body4830.preheader +.LBB5_369: # %vector.body5144.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_370: # %vector.body4830 +.LBB5_370: # %vector.body5144 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_370 -# %bb.371: # %vector.body4835.preheader +# %bb.371: # %vector.body5149.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_372: # %vector.body4835 +.LBB5_372: # %vector.body5149 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_372 -# %bb.373: # %vector.body4840.preheader +# %bb.373: # %vector.body5154.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_374: # %vector.body4840 +.LBB5_374: # %vector.body5154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_374 -# %bb.375: # %vector.body4845.preheader +# %bb.375: # %vector.body5159.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_376: # %vector.body4845 +.LBB5_376: # %vector.body5159 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5122,7 +5082,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_376 b .LBB5_573 -.LBB5_377: # %vector.body4810.preheader +.LBB5_377: # %vector.body5124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -5131,43 +5091,43 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_378: # %vector.body4810 +.LBB5_378: # %vector.body5124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_378 -# %bb.379: # %vector.body4815.preheader +# %bb.379: # %vector.body5129.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_380: # %vector.body4815 +.LBB5_380: # %vector.body5129 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_380 -# %bb.381: # %vector.body4820.preheader +# %bb.381: # %vector.body5134.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_382: # %vector.body4820 +.LBB5_382: # %vector.body5134 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_382 -# %bb.383: # %vector.body4825.preheader +# %bb.383: # %vector.body5139.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_384: # %vector.body4825 +.LBB5_384: # %vector.body5139 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5189,87 +5149,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_386: # %vector.body4790 +.LBB5_386: # %vector.body5095 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_386 -# %bb.387: # %vector.body4795.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.387: # %vector.body5100.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_388: # %vector.body4795 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_388: # %vector.body5100 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_388 -# %bb.389: # %vector.body4800.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.389: # %vector.body5108.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_390: # %vector.body4800 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_390: # %vector.body5108 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_390 -# %bb.391: # %vector.body4805.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.391: # %vector.body5116.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_392: # %vector.body4805 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_392: # %vector.body5116 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_392 b .LBB5_573 .LBB5_393: # %.preheader.i1701.preheader @@ -5286,14 +5241,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_394: # %vector.body4775 +.LBB5_394: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_394 -# %bb.395: # %vector.body4780.preheader +# %bb.395: # %vector.body5085.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -5301,19 +5256,19 @@ init: # @init lu12i.w $a3, 219235 ori $a3, $a3, 1981 xvreplgr2vr.w $xr0, $a3 -.LBB5_396: # %vector.body4780 +.LBB5_396: # %vector.body5085 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_396 -# %bb.397: # %vector.body4785.preheader +# %bb.397: # %vector.body5090.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_398: # %vector.body4785 +.LBB5_398: # %vector.body5090 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5335,87 +5290,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_400: # %vector.body4755 +.LBB5_400: # %vector.body5051 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_400 -# %bb.401: # %vector.body4760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.401: # %vector.body5056.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_402: # %vector.body4760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_402: # %vector.body5056 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_402 -# %bb.403: # %vector.body4765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.403: # %vector.body5064.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_404: # %vector.body4765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_404: # %vector.body5064 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_404 -# %bb.405: # %vector.body4770.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.405: # %vector.body5072.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_406: # %vector.body4770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_406: # %vector.body5072 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_406 b .LBB5_573 .LBB5_407: # %.preheader.i1740.preheader @@ -5432,19 +5382,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_408: # %vector.body4745 +.LBB5_408: # %vector.body5041 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_408 -# %bb.409: # %vector.body4750.preheader +# %bb.409: # %vector.body5046.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_410: # %vector.body4750 +.LBB5_410: # %vector.body5046 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5452,21 +5402,21 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_410 b .LBB5_573 -.LBB5_411: # %vector.body4725.preheader +.LBB5_411: # %vector.body5018.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_412: # %vector.body4725 +.LBB5_412: # %vector.body5018 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_412 -# %bb.413: # %vector.body4730.preheader +# %bb.413: # %vector.body5023.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 @@ -5474,49 +5424,48 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr1, $a4 -.LBB5_414: # %vector.body4730 +.LBB5_414: # %vector.body5023 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a2, -32 xvst $xr1, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_414 -# %bb.415: # %vector.body4735.preheader +# %bb.415: # %vector.body5028.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_416: # %vector.body4735 +.LBB5_416: # %vector.body5028 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_416 -# %bb.417: # %vector.body4740.preheader +# %bb.417: # %vector.body5033.preheader lu12i.w $a2, 93 - ori $a2, $a2, 3156 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_418: # %vector.body4740 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_418: # %vector.body5033 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_418 b .LBB5_573 .LBB5_419: # %.preheader.i1769.preheader @@ -5533,7 +5482,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_420: # %vector.body4720 +.LBB5_420: # %vector.body5013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5555,7 +5504,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_422: # %vector.body4715 +.LBB5_422: # %vector.body5008 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5563,14 +5512,14 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_422 b .LBB5_573 -.LBB5_423: # %vector.body4700.preheader +.LBB5_423: # %vector.body4993.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_424: # %vector.body4700 +.LBB5_424: # %vector.body4993 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5663,14 +5612,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_428 b .LBB5_573 -.LBB5_429: # %vector.body4685.preheader +.LBB5_429: # %vector.body4978.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_430: # %vector.body4685 +.LBB5_430: # %vector.body4978 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5763,32 +5712,31 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_434 b .LBB5_573 -.LBB5_435: # %vector.body4663.preheader +.LBB5_435: # %vector.body4947.preheader + pcalau12i $s1, %pc_hi20(.LCPI5_0) + vld $vr0, $s1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) - addi.d $a0, $s0, 4 - ori $a1, $zero, 2 - lu12i.w $s1, 7 - ori $a2, $s1, 3328 -.LBB5_436: # %vector.body4663 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_436 + addi.d $a0, $s0, 8 + lu12i.w $s2, 7 + ori $a1, $s2, 3328 +.LBB5_436: # %vector.body4947 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_436 # %bb.437: # %.preheader.i1826.preheader lu12i.w $a1, 31 ori $a0, $a1, 1040 @@ -5799,54 +5747,50 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 2104 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_438: # %vector.body4668 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_438 -# %bb.439: # %vector.body4673.preheader + ori $a1, $s2, 3328 +.LBB5_438: # %vector.body4955 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_438 +# %bb.439: # %vector.body4963.preheader lu12i.w $a0, 93 - ori $a0, $a0, 3156 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 3160 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_440: # %vector.body4673 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_440 + ori $a1, $s2, 3328 +.LBB5_440: # %vector.body4963 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_440 # %bb.441: # %.preheader.i1840.preheader lu12i.w $a0, 125 ori $a0, $a0, 128 @@ -5905,30 +5849,30 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_442 b .LBB5_573 -.LBB5_443: # %vector.body4639.preheader +.LBB5_443: # %vector.body4923.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_444: # %vector.body4639 +.LBB5_444: # %vector.body4923 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_444 -# %bb.445: # %vector.body4644.preheader +# %bb.445: # %vector.body4928.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 -.LBB5_446: # %vector.body4644 +.LBB5_446: # %vector.body4928 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5959,14 +5903,14 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_446 -# %bb.447: # %vector.body4651.preheader +# %bb.447: # %vector.body4935.preheader lu12i.w $a4, 62 - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 2096 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_448: # %vector.body4651 +.LBB5_448: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5997,12 +5941,12 @@ init: # @init addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 bnez $a3, .LBB5_448 -# %bb.449: # %vector.body4658.preheader +# %bb.449: # %vector.body4942.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_450: # %vector.body4658 +.LBB5_450: # %vector.body4942 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6010,154 +5954,148 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_450 b .LBB5_573 -.LBB5_451: # %vector.body4624.preheader +.LBB5_451: # %vector.body4902.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_452: # %vector.body4624 +.LBB5_452: # %vector.body4902 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_452 -# %bb.453: # %vector.body4629.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.453: # %vector.body4907.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_454: # %vector.body4629 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_454: # %vector.body4907 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_454 -# %bb.455: # %vector.body4634.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.455: # %vector.body4915.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_456: # %vector.body4634 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_456: # %vector.body4915 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_456 b .LBB5_573 -.LBB5_457: # %vector.body4599.preheader +.LBB5_457: # %vector.body4871.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_458: # %vector.body4599 +.LBB5_458: # %vector.body4871 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_458 -# %bb.459: # %vector.body4604.preheader +# %bb.459: # %vector.body4876.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_460: # %vector.body4604 +.LBB5_460: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_460 -# %bb.461: # %vector.body4609.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.461: # %vector.body4881.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_462: # %vector.body4609 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_462: # %vector.body4881 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_462 -# %bb.463: # %vector.body4614.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.463: # %vector.body4889.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_464: # %vector.body4614 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_464 -# %bb.465: # %vector.body4619.preheader + ori $a3, $a1, 3328 +.LBB5_464: # %vector.body4889 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 + bnez $a3, .LBB5_464 +# %bb.465: # %vector.body4897.preheader lu12i.w $a2, 125 ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3264 -.LBB5_466: # %vector.body4619 +.LBB5_466: # %vector.body4897 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6165,45 +6103,45 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_466 b .LBB5_573 -.LBB5_467: # %vector.body4574.preheader +.LBB5_467: # %vector.body4843.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_468: # %vector.body4574 +.LBB5_468: # %vector.body4843 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_468 -# %bb.469: # %vector.body4579.preheader +# %bb.469: # %vector.body4848.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_470: # %vector.body4579 +.LBB5_470: # %vector.body4848 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_470 -# %bb.471: # %vector.body4584.preheader +# %bb.471: # %vector.body4853.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_472: # %vector.body4584 +.LBB5_472: # %vector.body4853 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_472 -# %bb.473: # %vector.body4589.preheader +# %bb.473: # %vector.body4858.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a2, $a0, $a2 @@ -6211,37 +6149,36 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_474: # %vector.body4589 +.LBB5_474: # %vector.body4858 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_474 -# %bb.475: # %vector.body4594.preheader +# %bb.475: # %vector.body4863.preheader lu12i.w $a2, 125 - ori $a2, $a2, 132 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_476: # %vector.body4594 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_476: # %vector.body4863 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_476 b .LBB5_573 .LBB5_477: # %.preheader.i1927.preheader @@ -6258,74 +6195,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_478: # %vector.body4554 +.LBB5_478: # %vector.body4817 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_478 -# %bb.479: # %vector.body4559.preheader +# %bb.479: # %vector.body4822.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_480: # %vector.body4559 +.LBB5_480: # %vector.body4822 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_480 -# %bb.481: # %vector.body4564.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.481: # %vector.body4827.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_482: # %vector.body4564 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_482: # %vector.body4827 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_482 -# %bb.483: # %vector.body4569.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.483: # %vector.body4835.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_484: # %vector.body4569 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_484: # %vector.body4835 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_484 b .LBB5_573 .LBB5_485: # %.preheader.i1949.preheader @@ -6461,669 +6395,638 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_490 b .LBB5_573 -.LBB5_491: # %vector.body4519.preheader +.LBB5_491: # %vector.body4773.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_492: # %vector.body4519 +.LBB5_492: # %vector.body4773 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_492 -# %bb.493: # %vector.body4524.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.493: # %vector.body4778.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_494: # %vector.body4524 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_494: # %vector.body4778 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_494 -# %bb.495: # %vector.body4529.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.495: # %vector.body4786.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_496: # %vector.body4529 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_496: # %vector.body4786 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_496 -# %bb.497: # %vector.body4534.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.497: # %vector.body4794.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_498: # %vector.body4534 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_498: # %vector.body4794 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_498 b .LBB5_573 -.LBB5_499: # %vector.body4489.preheader +.LBB5_499: # %vector.body4734.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_500: # %vector.body4489 +.LBB5_500: # %vector.body4734 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_500 -# %bb.501: # %vector.body4494.preheader +# %bb.501: # %vector.body4739.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_502: # %vector.body4494 +.LBB5_502: # %vector.body4739 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_502 -# %bb.503: # %vector.body4499.preheader +# %bb.503: # %vector.body4744.preheader lu12i.w $a2, 46 ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_504: # %vector.body4499 +.LBB5_504: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_504 -# %bb.505: # %vector.body4504.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.505: # %vector.body4749.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_506: # %vector.body4504 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_506: # %vector.body4749 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_506 -# %bb.507: # %vector.body4509.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.507: # %vector.body4757.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_508: # %vector.body4509 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_508: # %vector.body4757 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_508 -# %bb.509: # %vector.body4514.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.509: # %vector.body4765.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_510: # %vector.body4514 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_510: # %vector.body4765 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_510 b .LBB5_573 -.LBB5_511: # %vector.body4459.preheader +.LBB5_511: # %vector.body4695.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_512: # %vector.body4459 +.LBB5_512: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_512 -# %bb.513: # %vector.body4464.preheader +# %bb.513: # %vector.body4700.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_514: # %vector.body4464 +.LBB5_514: # %vector.body4700 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_514 -# %bb.515: # %vector.body4469.preheader +# %bb.515: # %vector.body4705.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_516: # %vector.body4469 +.LBB5_516: # %vector.body4705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_516 -# %bb.517: # %vector.body4474.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.517: # %vector.body4710.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_518: # %vector.body4474 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_518: # %vector.body4710 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_518 -# %bb.519: # %vector.body4479.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.519: # %vector.body4718.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_520: # %vector.body4479 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_520: # %vector.body4718 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_520 -# %bb.521: # %vector.body4484.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.521: # %vector.body4726.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_522: # %vector.body4484 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_522: # %vector.body4726 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_522 b .LBB5_573 -.LBB5_523: # %vector.body4429.preheader +.LBB5_523: # %vector.body4656.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_524: # %vector.body4429 +.LBB5_524: # %vector.body4656 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_524 -# %bb.525: # %vector.body4434.preheader +# %bb.525: # %vector.body4661.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_526: # %vector.body4434 +.LBB5_526: # %vector.body4661 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_526 -# %bb.527: # %vector.body4439.preheader +# %bb.527: # %vector.body4666.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_528: # %vector.body4439 +.LBB5_528: # %vector.body4666 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_528 -# %bb.529: # %vector.body4444.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.529: # %vector.body4671.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_530: # %vector.body4444 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_530: # %vector.body4671 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_530 -# %bb.531: # %vector.body4449.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.531: # %vector.body4679.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_532: # %vector.body4449 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_532: # %vector.body4679 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_532 -# %bb.533: # %vector.body4454.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.533: # %vector.body4687.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_534: # %vector.body4454 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_534: # %vector.body4687 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_534 b .LBB5_573 -.LBB5_535: # %vector.body4404.preheader +.LBB5_535: # %vector.body4622.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_536: # %vector.body4404 +.LBB5_536: # %vector.body4622 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_536 -# %bb.537: # %vector.body4409.preheader +# %bb.537: # %vector.body4627.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_538: # %vector.body4409 +.LBB5_538: # %vector.body4627 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_538 -# %bb.539: # %vector.body4414.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.539: # %vector.body4632.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_540: # %vector.body4414 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_540: # %vector.body4632 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_540 -# %bb.541: # %vector.body4419.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.541: # %vector.body4640.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_542: # %vector.body4419 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_542: # %vector.body4640 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_542 -# %bb.543: # %vector.body4424.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.543: # %vector.body4648.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_544: # %vector.body4424 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_544: # %vector.body4648 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_544 b .LBB5_573 -.LBB5_545: # %vector.body4389.preheader +.LBB5_545: # %vector.body4601.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_546: # %vector.body4389 +.LBB5_546: # %vector.body4601 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_546 -# %bb.547: # %vector.body4394.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.547: # %vector.body4606.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_548: # %vector.body4394 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_548: # %vector.body4606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_548 -# %bb.549: # %vector.body4399.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.549: # %vector.body4614.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_550: # %vector.body4399 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_550: # %vector.body4614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_550 b .LBB5_573 -.LBB5_551: # %vector.body4374.preheader +.LBB5_551: # %vector.body4580.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_552: # %vector.body4374 +.LBB5_552: # %vector.body4580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_552 -# %bb.553: # %vector.body4379.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.553: # %vector.body4585.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_554: # %vector.body4379 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_554: # %vector.body4585 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_554 -# %bb.555: # %vector.body4384.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.555: # %vector.body4593.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_556: # %vector.body4384 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_556: # %vector.body4593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_556 b .LBB5_573 .LBB5_557: # %.preheader.i2121.preheader @@ -7140,19 +7043,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_558: # %vector.body4364 +.LBB5_558: # %vector.body4570 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_558 -# %bb.559: # %vector.body4369.preheader +# %bb.559: # %vector.body4575.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_560: # %vector.body4369 +.LBB5_560: # %vector.body4575 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7174,7 +7077,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_562: # %vector.body4359 +.LBB5_562: # %vector.body4565 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7196,7 +7099,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_564: # %vector.body4354 +.LBB5_564: # %vector.body4560 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7204,31 +7107,30 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_564 b .LBB5_573 -.LBB5_565: # %vector.body4349.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_566: # %vector.body4349 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_566 +.LBB5_565: # %vector.body4552.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_566: # %vector.body4552 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_566 b .LBB5_573 .LBB5_567: # %.preheader.i2154.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7377,43 +7279,43 @@ init: # @init bnez $a2, .LBB5_572 .LBB5_573: # %set1d.exit1086 move $a0, $zero - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $fp, $sp, 64 # 8-byte Folded Reload - ld.d $ra, $sp, 72 # 8-byte Folded Reload - addi.d $sp, $sp, 80 + ld.d $s2, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload + ld.d $s0, $sp, 72 # 8-byte Folded Reload + ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $ra, $sp, 88 # 8-byte Folded Reload + addi.d $sp, $sp, 96 ret .LBB5_574: # %.preheader.i2177.preheader pcalau12i $a0, %pc_hi20(global_data+640192) addi.d $a0, $a0, %pc_lo12(global_data+640192) b .LBB5_345 -.LBB5_575: # %vector.body4325.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_576: # %vector.body4325 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_576 +.LBB5_575: # %vector.body4525.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_576: # %vector.body4525 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_576 b .LBB5_573 -.LBB5_577: # %vector.body4320.preheader +.LBB5_577: # %vector.body4520.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 @@ -7421,7 +7323,7 @@ init: # @init lu12i.w $a2, 260096 ori $a2, $a2, 8 xvreplgr2vr.w $xr0, $a2 -.LBB5_578: # %vector.body4320 +.LBB5_578: # %vector.body4520 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7429,162 +7331,155 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_578 b .LBB5_573 -.LBB5_579: # %vector.body4310.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_580: # %vector.body4310 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_579: # %vector.body4504.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_580: # %vector.body4504 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_580 -# %bb.581: # %vector.body4315.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_582: # %vector.body4315 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.581: # %vector.body4512.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_582: # %vector.body4512 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_582 b .LBB5_573 -.LBB5_583: # %vector.body4305.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_584: # %vector.body4305 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_584 +.LBB5_583: # %vector.body4496.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_584: # %vector.body4496 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_584 b .LBB5_573 -.LBB5_585: # %vector.body4300.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_586: # %vector.body4300 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_586 +.LBB5_585: # %vector.body4488.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_586: # %vector.body4488 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_586 b .LBB5_573 -.LBB5_587: # %vector.body4295.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_588: # %vector.body4295 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_588 +.LBB5_587: # %vector.body4480.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_588: # %vector.body4480 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_588 b .LBB5_573 -.LBB5_589: # %vector.body4290.preheader +.LBB5_589: # %vector.body4472.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_590: # %vector.body4290 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_590 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_590: # %vector.body4472 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_590 b .LBB5_607 .LBB5_591: # %.preheader.i2230.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7603,79 +7498,74 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a1, $fp, $a0 - ori $a2, $zero, 2 + ori $a0, $a0, 2104 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + add.d $a2, $fp, $a0 lu12i.w $a0, 7 ori $a3, $a0, 3328 -.LBB5_592: # %vector.body4275 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_592: # %vector.body4448 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_592 -# %bb.593: # %vector.body4280.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.593: # %vector.body4456.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_594: # %vector.body4280 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_594: # %vector.body4456 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_594 -# %bb.595: # %vector.body4285.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.595: # %vector.body4464.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_596: # %vector.body4285 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_596: # %vector.body4464 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_596 b .LBB5_573 .LBB5_597: # %.preheader34.i2253.preheader @@ -7734,42 +7624,41 @@ init: # @init lu12i.w $a1, 220 ori $a1, $a1, 1212 b .LBB5_630 -.LBB5_600: # %vector.body4263.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_601: # %vector.body4263 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_601 - b .LBB5_573 -.LBB5_602: # %vector.body4256.preheader +.LBB5_600: # %vector.body4433.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_601: # %vector.body4433 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_601 + b .LBB5_573 +.LBB5_602: # %vector.body4426.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_603: # %vector.body4256 +.LBB5_603: # %vector.body4426 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -7804,45 +7693,44 @@ init: # @init pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) b .LBB5_610 -.LBB5_605: # %vector.body4251.preheader +.LBB5_605: # %vector.body4418.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_606: # %vector.body4251 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_606 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_606: # %vector.body4418 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_606 .LBB5_607: # %set1d.exit2229 lu12i.w $a1, 31 ori $a1, $a1, 1020 lu12i.w $a2, -262144 b .LBB5_687 -.LBB5_608: # %vector.body4246.preheader +.LBB5_608: # %vector.body4413.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_609: # %vector.body4246 +.LBB5_609: # %vector.body4413 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7855,14 +7743,14 @@ init: # @init add.d $a0, $a0, $a2 ori $a2, $a1, 1024 b .LBB5_346 -.LBB5_611: # %vector.body4241.preheader +.LBB5_611: # %vector.body4408.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_612: # %vector.body4241 +.LBB5_612: # %vector.body4408 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7886,147 +7774,141 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 b .LBB5_573 -.LBB5_614: # %vector.body4216.preheader +.LBB5_614: # %vector.body4374.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_615: # %vector.body4216 +.LBB5_615: # %vector.body4374 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_615 -# %bb.616: # %vector.body4221.preheader +# %bb.616: # %vector.body4379.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_617: # %vector.body4221 +.LBB5_617: # %vector.body4379 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_617 -# %bb.618: # %vector.body4226.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.618: # %vector.body4384.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_619: # %vector.body4226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_619: # %vector.body4384 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_619 -# %bb.620: # %vector.body4231.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.620: # %vector.body4392.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_621: # %vector.body4231 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_621: # %vector.body4392 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_621 -# %bb.622: # %vector.body4236.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.622: # %vector.body4400.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_623: # %vector.body4236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_623: # %vector.body4400 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_623 b .LBB5_573 -.LBB5_624: # %vector.body4211.preheader +.LBB5_624: # %vector.body4366.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_625: # %vector.body4211 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_625 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_625: # %vector.body4366 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_625 # %bb.626: # %set1d.exit2327 lu12i.w $a1, 31 b .LBB5_686 -.LBB5_627: # %vector.body4204.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_627: # %vector.body4359.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_628: # %vector.body4204 +.LBB5_628: # %vector.body4359 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8075,81 +7957,77 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ori $a0, $s0, 1044 + ori $a0, $s0, 1048 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_632: # %vector.body4199 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_632 - b .LBB5_573 -.LBB5_633: # %vector.body4189.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_634: # %vector.body4189 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_634 -# %bb.635: # %vector.body4194.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_636: # %vector.body4194 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_632: # %vector.body4351 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_632 + b .LBB5_573 +.LBB5_633: # %vector.body4335.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_634: # %vector.body4335 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_634 +# %bb.635: # %vector.body4343.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_636: # %vector.body4343 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_636 b .LBB5_573 .LBB5_637: # %.preheader34.i2352.preheader @@ -8248,26 +8126,26 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_640 b .LBB5_573 -.LBB5_641: # %vector.body4167.preheader +.LBB5_641: # %vector.body4313.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_642: # %vector.body4167 +.LBB5_642: # %vector.body4313 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_642 -# %bb.643: # %vector.body4172.preheader +# %bb.643: # %vector.body4318.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_644: # %vector.body4172 +.LBB5_644: # %vector.body4318 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8275,78 +8153,75 @@ init: # @init addi.d $a2, $a2, 64 bnez $a1, .LBB5_644 b .LBB5_653 -.LBB5_645: # %vector.body4157.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_646: # %vector.body4157 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_646 -# %bb.647: # %vector.body4162.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_648: # %vector.body4162 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_645: # %vector.body4297.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_646: # %vector.body4297 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_646 +# %bb.647: # %vector.body4305.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_648: # %vector.body4305 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_648 b .LBB5_573 -.LBB5_649: # %vector.body4147.preheader +.LBB5_649: # %vector.body4287.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_650: # %vector.body4147 +.LBB5_650: # %vector.body4287 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_650 -# %bb.651: # %vector.body4152.preheader +# %bb.651: # %vector.body4292.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_652: # %vector.body4152 +.LBB5_652: # %vector.body4292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8359,134 +8234,128 @@ init: # @init lu12i.w $a2, 260096 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_654: # %vector.body4132.preheader +.LBB5_654: # %vector.body4266.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_655: # %vector.body4132 +.LBB5_655: # %vector.body4266 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_655 -# %bb.656: # %vector.body4137.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.656: # %vector.body4271.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_657: # %vector.body4137 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_657: # %vector.body4271 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_657 -# %bb.658: # %vector.body4142.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.658: # %vector.body4279.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_659: # %vector.body4142 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_659: # %vector.body4279 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_659 b .LBB5_573 -.LBB5_660: # %vector.body4117.preheader +.LBB5_660: # %vector.body4245.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_661: # %vector.body4117 +.LBB5_661: # %vector.body4245 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_661 -# %bb.662: # %vector.body4122.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.662: # %vector.body4250.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_663: # %vector.body4122 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_663: # %vector.body4250 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_663 -# %bb.664: # %vector.body4127.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.664: # %vector.body4258.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_665: # %vector.body4127 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_665: # %vector.body4258 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_665 b .LBB5_573 .LBB5_666: # %.preheader.i2421.preheader @@ -8503,74 +8372,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_667: # %vector.body4097 +.LBB5_667: # %vector.body4219 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_667 -# %bb.668: # %vector.body4102.preheader +# %bb.668: # %vector.body4224.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_669: # %vector.body4102 +.LBB5_669: # %vector.body4224 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_669 -# %bb.670: # %vector.body4107.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.670: # %vector.body4229.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_671: # %vector.body4107 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_671: # %vector.body4229 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_671 -# %bb.672: # %vector.body4112.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.672: # %vector.body4237.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_673: # %vector.body4112 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_673: # %vector.body4237 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_673 b .LBB5_573 .LBB5_674: # %.preheader.i2443.preheader @@ -8719,69 +8585,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_679 b .LBB5_573 -.LBB5_680: # %vector.body4063.preheader +.LBB5_680: # %vector.body4179.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 xvldi $xr0, -1424 -.LBB5_681: # %vector.body4063 +.LBB5_681: # %vector.body4179 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a3, $a3, -16 addi.d $a1, $a1, 64 bnez $a3, .LBB5_681 -# %bb.682: # %vector.body4068.preheader +# %bb.682: # %vector.body4184.preheader lu12i.w $a1, 31 - ori $a3, $a1, 1044 - add.d $a3, $a0, $a3 - ori $a4, $zero, 2 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a4, $a1, 1048 + add.d $a4, $a0, $a4 ori $a5, $a2, 3328 -.LBB5_683: # %vector.body4068 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - addi.d $a7, $a4, -1 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa0, $a7 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a5, $a5, -2 - addi.w $a4, $a4, 2 +.LBB5_683: # %vector.body4184 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a4, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a5, $a5, -4 + addi.d $a4, $a4, 16 bnez $a5, .LBB5_683 -# %bb.684: # %vector.body4073.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2100 +# %bb.684: # %vector.body4192.preheader + lu12i.w $a4, 62 + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a3, $a4, 2104 add.d $a3, $a0, $a3 - ori $a4, $zero, 2 ori $a2, $a2, 3328 -.LBB5_685: # %vector.body4073 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a4, 31, 0 - addi.d $a6, $a4, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a2, $a2, -2 - addi.w $a4, $a4, 2 +.LBB5_685: # %vector.body4192 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a3, $a3, 16 bnez $a2, .LBB5_685 .LBB5_686: # %set1d.exit2479 ori $a1, $a1, 1020 @@ -8790,16 +8653,16 @@ init: # @init lu32i.d $a2, 0 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_688: # %vector.body4051.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_688: # %vector.body4167.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_689: # %vector.body4051 +.LBB5_689: # %vector.body4167 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8830,12 +8693,12 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_689 -# %bb.690: # %vector.body4058.preheader +# %bb.690: # %vector.body4174.preheader pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 xvldi $xr0, -1424 -.LBB5_691: # %vector.body4058 +.LBB5_691: # %vector.body4174 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -8843,28 +8706,28 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_691 b .LBB5_573 -.LBB5_692: # %vector.body4039.preheader +.LBB5_692: # %vector.body4155.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_693: # %vector.body4039 +.LBB5_693: # %vector.body4155 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_693 -# %bb.694: # %vector.body4044.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.694: # %vector.body4160.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_695: # %vector.body4044 +.LBB5_695: # %vector.body4160 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8904,15 +8767,15 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_697: # %vector.body4032 +.LBB5_697: # %vector.body4148 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8944,28 +8807,28 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_697 b .LBB5_573 -.LBB5_698: # %vector.body4020.preheader +.LBB5_698: # %vector.body4136.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_699: # %vector.body4020 +.LBB5_699: # %vector.body4136 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_699 -# %bb.700: # %vector.body4025.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.700: # %vector.body4141.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_701: # %vector.body4025 +.LBB5_701: # %vector.body4141 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8997,30 +8860,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_701 b .LBB5_573 -.LBB5_702: # %vector.body4008.preheader +.LBB5_702: # %vector.body4124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_703: # %vector.body4008 +.LBB5_703: # %vector.body4124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_703 -# %bb.704: # %vector.body4013.preheader +# %bb.704: # %vector.body4129.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_705: # %vector.body4013 +.LBB5_705: # %vector.body4129 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9052,30 +8915,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_705 b .LBB5_573 -.LBB5_706: # %vector.body3996.preheader +.LBB5_706: # %vector.body4112.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_707: # %vector.body3996 +.LBB5_707: # %vector.body4112 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_707 -# %bb.708: # %vector.body4001.preheader +# %bb.708: # %vector.body4117.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_709: # %vector.body4001 +.LBB5_709: # %vector.body4117 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9107,7 +8970,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_709 b .LBB5_573 -.LBB5_710: # %vector.body3971.preheader +.LBB5_710: # %vector.body4081.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 @@ -9115,7 +8978,7 @@ init: # @init ori $a1, $s0, 3328 xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill -.LBB5_711: # %vector.body3971 +.LBB5_711: # %vector.body4081 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9123,63 +8986,60 @@ init: # @init addi.d $a1, $a1, -16 addi.d $a0, $a0, 64 bnez $a1, .LBB5_711 -# %bb.712: # %vector.body3976.preheader +# %bb.712: # %vector.body4086.preheader lu12i.w $a1, 31 - ori $a0, $a1, 1044 - add.d $a0, $fp, $a0 - ori $a2, $zero, 2 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a2, $a1, 1048 + add.d $a2, $fp, $a2 ori $a3, $s0, 3328 -.LBB5_713: # %vector.body3976 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_713: # %vector.body4086 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_713 -# %bb.714: # %vector.body3981.preheader - lu12i.w $a0, 62 - ori $a0, $a0, 2100 +# %bb.714: # %vector.body4094.preheader + lu12i.w $a2, 62 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a2, 2104 add.d $a0, $fp, $a0 - ori $a2, $zero, 2 - ori $a3, $s0, 3328 -.LBB5_715: # %vector.body3981 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_715 -# %bb.716: # %vector.body3986.preheader + ori $a2, $s0, 3328 +.LBB5_715: # %vector.body4094 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a0, $a0, 16 + bnez $a2, .LBB5_715 +# %bb.716: # %vector.body4102.preheader lu12i.w $a0, 93 ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 xvldi $xr0, -1296 -.LBB5_717: # %vector.body3986 +.LBB5_717: # %vector.body4102 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -9198,7 +9058,7 @@ init: # @init ori $a0, $a0, 2500 add.d $a0, $fp, $a0 ori $a1, $s0, 3328 -.LBB5_719: # %vector.body3991 +.LBB5_719: # %vector.body4107 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9207,238 +9067,225 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_719 b .LBB5_573 -.LBB5_720: # %vector.body3946.preheader +.LBB5_720: # %vector.body4044.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_721: # %vector.body3946 +.LBB5_721: # %vector.body4044 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_721 -# %bb.722: # %vector.body3951.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.722: # %vector.body4049.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_723: # %vector.body3951 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_723: # %vector.body4049 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_723 -# %bb.724: # %vector.body3956.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.724: # %vector.body4057.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_725: # %vector.body3956 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_725: # %vector.body4057 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_725 -# %bb.726: # %vector.body3961.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.726: # %vector.body4065.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_727: # %vector.body3961 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_727: # %vector.body4065 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_727 -# %bb.728: # %vector.body3966.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.728: # %vector.body4073.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_729: # %vector.body3966 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_729: # %vector.body4073 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_729 b .LBB5_573 -.LBB5_730: # %vector.body3931.preheader +.LBB5_730: # %vector.body4023.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_731: # %vector.body3931 +.LBB5_731: # %vector.body4023 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_731 -# %bb.732: # %vector.body3936.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.732: # %vector.body4028.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_733: # %vector.body3936 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_733: # %vector.body4028 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_733 -# %bb.734: # %vector.body3941.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.734: # %vector.body4036.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_735: # %vector.body3941 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_735: # %vector.body4036 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_735 b .LBB5_573 -.LBB5_736: # %vector.body3921.preheader +.LBB5_736: # %vector.body4007.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_737: # %vector.body3921 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_737: # %vector.body4007 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_737 -# %bb.738: # %vector.body3926.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.738: # %vector.body4015.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_739: # %vector.body3926 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_739: # %vector.body4015 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_739 b .LBB5_573 .LBB5_740: # %.preheader.i2608.preheader @@ -9455,14 +9302,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_741: # %vector.body3911 +.LBB5_741: # %vector.body3997 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_741 -# %bb.742: # %vector.body3916.preheader +# %bb.742: # %vector.body4002.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -9470,7 +9317,7 @@ init: # @init lu12i.w $a2, 219235 ori $a2, $a2, 1981 xvreplgr2vr.w $xr0, $a2 -.LBB5_743: # %vector.body3916 +.LBB5_743: # %vector.body4002 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -9488,14 +9335,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_745: # %vector.body3904 +.LBB5_745: # %vector.body3990 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9527,248 +9374,237 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_745 b .LBB5_573 -.LBB5_746: # %vector.body3879.preheader +.LBB5_746: # %vector.body3959.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_747: # %vector.body3879 +.LBB5_747: # %vector.body3959 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_747 -# %bb.748: # %vector.body3884.preheader +# %bb.748: # %vector.body3964.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_749: # %vector.body3884 +.LBB5_749: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_749 -# %bb.750: # %vector.body3889.preheader +# %bb.750: # %vector.body3969.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_751: # %vector.body3889 +.LBB5_751: # %vector.body3969 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_751 -# %bb.752: # %vector.body3894.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.752: # %vector.body3974.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_753: # %vector.body3894 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_753: # %vector.body3974 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_753 -# %bb.754: # %vector.body3899.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.754: # %vector.body3982.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_755: # %vector.body3899 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_755: # %vector.body3982 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_755 b .LBB5_573 -.LBB5_756: # %vector.body3859.preheader +.LBB5_756: # %vector.body3930.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_757: # %vector.body3859 +.LBB5_757: # %vector.body3930 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_757 -# %bb.758: # %vector.body3864.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.758: # %vector.body3935.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_759: # %vector.body3864 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_759: # %vector.body3935 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_759 -# %bb.760: # %vector.body3869.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.760: # %vector.body3943.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_761: # %vector.body3869 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_761: # %vector.body3943 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_761 -# %bb.762: # %vector.body3874.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.762: # %vector.body3951.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_763: # %vector.body3874 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_763: # %vector.body3951 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_763 b .LBB5_573 -.LBB5_764: # %vector.body3844.preheader +.LBB5_764: # %vector.body3909.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_765: # %vector.body3844 +.LBB5_765: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_765 -# %bb.766: # %vector.body3849.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.766: # %vector.body3914.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_767: # %vector.body3849 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_767: # %vector.body3914 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_767 -# %bb.768: # %vector.body3854.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.768: # %vector.body3922.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_769: # %vector.body3854 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_769: # %vector.body3922 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_769 b .LBB5_573 .LBB5_770: # %.preheader.i2684.preheader @@ -9785,103 +9621,99 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_771: # %vector.body3829 +.LBB5_771: # %vector.body3888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_771 -# %bb.772: # %vector.body3834.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.772: # %vector.body3893.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_773: # %vector.body3834 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_773: # %vector.body3893 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_773 -# %bb.774: # %vector.body3839.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.774: # %vector.body3901.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_775: # %vector.body3839 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_775: # %vector.body3901 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_775 b .LBB5_573 -.LBB5_776: # %vector.body3819.preheader +.LBB5_776: # %vector.body3875.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 32 - lu12i.w $a2, 7 - ori $a3, $a2, 3328 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_777: # %vector.body3819 +.LBB5_777: # %vector.body3875 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 - addi.d $a1, $a1, 64 + addi.d $a2, $a2, 64 bnez $a3, .LBB5_777 -# %bb.778: # %vector.body3824.preheader - lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a0, $a0, $a1 - ori $a1, $zero, 2 - ori $a2, $a2, 3328 -.LBB5_779: # %vector.body3824 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_779 +# %bb.778: # %vector.body3880.preheader + lu12i.w $a2, 31 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 1048 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_779: # %vector.body3880 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_779 b .LBB5_573 .LBB5_780: # %.preheader.i2711.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9897,23 +9729,23 @@ init: # @init lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_781: # %vector.body3807 +.LBB5_781: # %vector.body3863 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_781 -# %bb.782: # %vector.body3812.preheader +# %bb.782: # %vector.body3868.preheader lu12i.w $a0, 62 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) pcalau12i $a2, %pc_hi20(.LCPI5_1) - xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_2) + xvld $xr1, $a2, %pc_lo12(.LCPI5_2) ori $a0, $a0, 2096 add.d $a0, $fp, $a0 ori $a1, $a1, 3328 -.LBB5_783: # %vector.body3812 +.LBB5_783: # %vector.body3868 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9959,142 +9791,135 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_785: # %vector.body3792 +.LBB5_785: # %vector.body3842 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_785 -# %bb.786: # %vector.body3797.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.786: # %vector.body3847.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_787: # %vector.body3797 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_787: # %vector.body3847 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_787 -# %bb.788: # %vector.body3802.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.788: # %vector.body3855.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_789: # %vector.body3802 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_789: # %vector.body3855 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_789 b .LBB5_573 -.LBB5_790: # %vector.body3782.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_791: # %vector.body3782 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_790: # %vector.body3826.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_791: # %vector.body3826 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_791 -# %bb.792: # %vector.body3787.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_793: # %vector.body3787 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.792: # %vector.body3834.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_793: # %vector.body3834 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_793 b .LBB5_573 -.LBB5_794: # %vector.body3770.preheader +.LBB5_794: # %vector.body3811.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_795: # %vector.body3770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_795 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_795: # %vector.body3811 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_795 # %bb.796: # %.preheader34.i2757.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -10160,127 +9985,121 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_799: # %vector.body3755 +.LBB5_799: # %vector.body3790 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_799 -# %bb.800: # %vector.body3760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.800: # %vector.body3795.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_801: # %vector.body3760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_801: # %vector.body3795 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_801 -# %bb.802: # %vector.body3765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.802: # %vector.body3803.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_803: # %vector.body3765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_803: # %vector.body3803 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_803 b .LBB5_573 -.LBB5_804: # %vector.body3740.preheader +.LBB5_804: # %vector.body3769.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_805: # %vector.body3740 +.LBB5_805: # %vector.body3769 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_805 -# %bb.806: # %vector.body3745.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.806: # %vector.body3774.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_807: # %vector.body3745 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_807: # %vector.body3774 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_807 -# %bb.808: # %vector.body3750.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.808: # %vector.body3782.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_809: # %vector.body3750 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_809: # %vector.body3782 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_809 b .LBB5_573 .LBB5_810: # %.preheader.i2797.preheader @@ -10293,14 +10112,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_811: # %vector.body3733 +.LBB5_811: # %vector.body3762 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10342,14 +10161,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_813: # %vector.body3726 +.LBB5_813: # %vector.body3755 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10391,14 +10210,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_815: # %vector.body3719 +.LBB5_815: # %vector.body3748 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10440,14 +10259,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_817: # %vector.body3712 +.LBB5_817: # %vector.body3741 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10489,14 +10308,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_819: # %vector.body3705 +.LBB5_819: # %vector.body3734 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10528,26 +10347,26 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_819 b .LBB5_573 -.LBB5_820: # %vector.body3695.preheader +.LBB5_820: # %vector.body3724.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_821: # %vector.body3695 +.LBB5_821: # %vector.body3724 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_821 -# %bb.822: # %vector.body3700.preheader +# %bb.822: # %vector.body3729.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_823: # %vector.body3700 +.LBB5_823: # %vector.body3729 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10555,95 +10374,92 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_823 b .LBB5_573 -.LBB5_824: # %vector.body3680.preheader +.LBB5_824: # %vector.body3703.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_825: # %vector.body3680 +.LBB5_825: # %vector.body3703 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_825 -# %bb.826: # %vector.body3685.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.826: # %vector.body3708.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_827: # %vector.body3685 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_827: # %vector.body3708 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_827 -# %bb.828: # %vector.body3690.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.828: # %vector.body3716.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_829: # %vector.body3690 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_829: # %vector.body3716 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_829 b .LBB5_573 -.LBB5_830: # %vector.body3668.preheader +.LBB5_830: # %vector.body3691.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_831: # %vector.body3668 +.LBB5_831: # %vector.body3691 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_831 -# %bb.832: # %vector.body3673.preheader +# %bb.832: # %vector.body3696.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_833: # %vector.body3673 +.LBB5_833: # %vector.body3696 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10675,16 +10491,16 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_833 b .LBB5_573 -.LBB5_834: # %vector.body3653.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_834: # %vector.body3674.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_835: # %vector.body3653 +.LBB5_835: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10715,7 +10531,7 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_835 -# %bb.836: # %vector.body3658.preheader +# %bb.836: # %vector.body3681.preheader pcalau12i $a1, %pc_hi20(global_data) addi.d $a1, $a1, %pc_lo12(global_data) lu12i.w $a2, 31 @@ -10723,20 +10539,20 @@ init: # @init add.d $a2, $a1, $a2 ori $a3, $a0, 3328 xvldi $xr0, -1424 -.LBB5_837: # %vector.body3658 +.LBB5_837: # %vector.body3681 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_837 -# %bb.838: # %vector.body3663.preheader +# %bb.838: # %vector.body3686.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 xvldi $xr0, -1296 -.LBB5_839: # %vector.body3663 +.LBB5_839: # %vector.body3686 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -10744,40 +10560,40 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_839 b .LBB5_573 -.LBB5_840: # %vector.body3638.preheader +.LBB5_840: # %vector.body3659.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_841: # %vector.body3638 +.LBB5_841: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_841 -# %bb.842: # %vector.body3643.preheader +# %bb.842: # %vector.body3664.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -3264 -.LBB5_843: # %vector.body3643 +.LBB5_843: # %vector.body3664 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_843 -# %bb.844: # %vector.body3648.preheader +# %bb.844: # %vector.body3669.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3265 -.LBB5_845: # %vector.body3648 +.LBB5_845: # %vector.body3669 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10785,82 +10601,78 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_845 b .LBB5_573 -.LBB5_846: # %vector.body3633.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_847: # %vector.body3633 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_847 - b .LBB5_573 -.LBB5_848: # %vector.body3623.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 +.LBB5_846: # %vector.body3651.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_849: # %vector.body3623 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_849 -# %bb.850: # %vector.body3628.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_851: # %vector.body3628 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_847: # %vector.body3651 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_847 + b .LBB5_573 +.LBB5_848: # %vector.body3635.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_849: # %vector.body3635 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_849 +# %bb.850: # %vector.body3643.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_851: # %vector.body3643 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_851 b .LBB5_573 .Lfunc_end5: diff --git a/results/MultiSource/Benchmarks/TSVC/StatementReordering-flt/CMakeFiles/StatementReordering-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/StatementReordering-flt/CMakeFiles/StatementReordering-flt.dir/tsc.s index 8a3e75f..954dc4a 100644 --- a/results/MultiSource/Benchmarks/TSVC/StatementReordering-flt/CMakeFiles/StatementReordering-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/StatementReordering-flt/CMakeFiles/StatementReordering-flt.dir/tsc.s @@ -11,6 +11,11 @@ .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI0_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1d .p2align 5 @@ -63,30 +68,29 @@ set1d: # @set1d bnez $a1, .LBB0_3 b .LBB0_8 .LBB0_4: # %vector.body40.preheader - addi.d $a0, $a0, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a1, %pc_hi20(.LCPI0_2) + vld $vr0, $a1, %pc_lo12(.LCPI0_2) + addi.d $a0, $a0, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB0_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB0_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB0_5 b .LBB0_8 .LBB0_6: # %.preheader.preheader move $a2, $zero @@ -118,6 +122,11 @@ set1d: # @set1d .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI1_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1ds .p2align 5 @@ -170,30 +179,29 @@ set1ds: # @set1ds bnez $a0, .LBB1_3 b .LBB1_8 .LBB1_4: # %vector.body40.preheader - addi.d $a0, $a1, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) + addi.d $a0, $a1, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB1_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB1_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB1_5 b .LBB1_8 .LBB1_6: # %.preheader.preheader move $a0, $zero @@ -685,14 +693,19 @@ check: # @check .word .LBB4_14-.LJTI4_0 .word .LBB4_17-.LJTI4_0 # -- End function - .section .rodata.cst32,"aM",@progbits,32 - .p2align 5, 0x0 # -- Begin function init + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 # -- Begin function init .LCPI5_0: + .dword 0 # 0x0 + .dword 1 # 0x1 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 +.LCPI5_1: .dword 4 # 0x4 .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 -.LCPI5_1: +.LCPI5_2: .dword 0 # 0x0 .dword 1 # 0x1 .dword 2 # 0x2 @@ -703,11 +716,12 @@ check: # @check .type init,@function init: # @init # %bb.0: - addi.d $sp, $sp, -80 - st.d $ra, $sp, 72 # 8-byte Folded Spill - st.d $fp, $sp, 64 # 8-byte Folded Spill - st.d $s0, $sp, 56 # 8-byte Folded Spill - st.d $s1, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -96 + st.d $ra, $sp, 88 # 8-byte Folded Spill + st.d $fp, $sp, 80 # 8-byte Folded Spill + st.d $s0, $sp, 72 # 8-byte Folded Spill + st.d $s1, $sp, 64 # 8-byte Folded Spill + st.d $s2, $sp, 56 # 8-byte Folded Spill move $fp, $a0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a1, $a0, %pc_lo12(.L.str.1) @@ -1661,131 +1675,122 @@ init: # @init jirl $ra, $ra, 0 bnez $a0, .LBB5_573 # %bb.136: # %vector.body.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 + addi.d $a3, $a0, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 .LBB5_137: # %vector.body # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_137 # %bb.138: # %vector.body3598.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 + lu12i.w $a3, 31 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 .LBB5_139: # %vector.body3598 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_139 -# %bb.140: # %vector.body3603.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_141: # %vector.body3603 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.140: # %vector.body3606.preheader + lu12i.w $a3, 62 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_141: # %vector.body3606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_141 -# %bb.142: # %vector.body3608.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_143: # %vector.body3608 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.142: # %vector.body3614.preheader + lu12i.w $a3, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_143: # %vector.body3614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_143 -# %bb.144: # %vector.body3613.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_145: # %vector.body3613 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 - bnez $a1, .LBB5_145 +# %bb.144: # %vector.body3622.preheader + lu12i.w $a3, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a3, 136 + add.d $a1, $a0, $a1 + ori $a2, $a2, 3328 +.LBB5_145: # %vector.body3622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_145 # %bb.146: # %.preheader34.i2938.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -1837,7 +1842,7 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_147 b .LBB5_573 -.LBB5_148: # %vector.body5445.preheader +.LBB5_148: # %vector.body5846.preheader pcalau12i $a0, %pc_hi20(X+4) addi.d $a0, $a0, %pc_lo12(X+4) pcalau12i $a1, %pc_hi20(V+4) @@ -1852,7 +1857,7 @@ init: # @init lu12i.w $a6, 7 ori $a6, $a6, 3328 .p2align 4, , 16 -.LBB5_149: # %vector.body5445 +.LBB5_149: # %vector.body5846 # =>This Inner Loop Header: Depth=1 bstrpick.d $a7, $a5, 31, 0 addi.d $t0, $a5, -5 @@ -1896,7 +1901,7 @@ init: # @init addi.d $a4, $a4, 8 bnez $a6, .LBB5_149 b .LBB5_573 -.LBB5_150: # %vector.body5412.preheader +.LBB5_150: # %vector.body5813.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -1904,24 +1909,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_151: # %vector.body5412 +.LBB5_151: # %vector.body5813 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_151 -# %bb.152: # %vector.body5417.preheader +# %bb.152: # %vector.body5818.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_153: # %vector.body5417 +.LBB5_153: # %vector.body5818 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1952,15 +1957,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_153 -# %bb.154: # %vector.body5424.preheader +# %bb.154: # %vector.body5825.preheader lu12i.w $a4, 62 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 2096 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_155: # %vector.body5424 +.LBB5_155: # %vector.body5825 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1991,15 +1996,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_155 -# %bb.156: # %vector.body5431.preheader +# %bb.156: # %vector.body5832.preheader lu12i.w $a4, 93 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 3152 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_157: # %vector.body5431 +.LBB5_157: # %vector.body5832 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2030,15 +2035,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_157 -# %bb.158: # %vector.body5438.preheader +# %bb.158: # %vector.body5839.preheader lu12i.w $a4, 125 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_159: # %vector.body5438 +.LBB5_159: # %vector.body5839 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2070,7 +2075,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_159 b .LBB5_573 -.LBB5_160: # %vector.body5400.preheader +.LBB5_160: # %vector.body5801.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2078,24 +2083,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_161: # %vector.body5400 +.LBB5_161: # %vector.body5801 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_161 -# %bb.162: # %vector.body5405.preheader +# %bb.162: # %vector.body5806.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_163: # %vector.body5405 +.LBB5_163: # %vector.body5806 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2127,7 +2132,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_163 b .LBB5_573 -.LBB5_164: # %vector.body5388.preheader +.LBB5_164: # %vector.body5789.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2135,24 +2140,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_165: # %vector.body5388 +.LBB5_165: # %vector.body5789 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_165 -# %bb.166: # %vector.body5393.preheader +# %bb.166: # %vector.body5794.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_167: # %vector.body5393 +.LBB5_167: # %vector.body5794 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2288,7 +2293,7 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_171 b .LBB5_573 -.LBB5_172: # %vector.body5354.preheader +.LBB5_172: # %vector.body5755.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2296,7 +2301,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_173: # %vector.body5354 +.LBB5_173: # %vector.body5755 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2436,14 +2441,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_179 b .LBB5_573 -.LBB5_180: # %vector.body5349.preheader +.LBB5_180: # %vector.body5750.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_181: # %vector.body5349 +.LBB5_181: # %vector.body5750 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -2451,7 +2456,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_181 b .LBB5_573 -.LBB5_182: # %vector.body5339.preheader +.LBB5_182: # %vector.body5740.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2459,7 +2464,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_183: # %vector.body5339 +.LBB5_183: # %vector.body5740 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2610,30 +2615,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_189 b .LBB5_573 -.LBB5_190: # %vector.body5315.preheader +.LBB5_190: # %vector.body5716.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_191: # %vector.body5315 +.LBB5_191: # %vector.body5716 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_191 -# %bb.192: # %vector.body5320.preheader +# %bb.192: # %vector.body5721.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_193: # %vector.body5320 +.LBB5_193: # %vector.body5721 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2665,30 +2670,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_193 b .LBB5_573 -.LBB5_194: # %vector.body5303.preheader +.LBB5_194: # %vector.body5704.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_195: # %vector.body5303 +.LBB5_195: # %vector.body5704 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_195 -# %bb.196: # %vector.body5308.preheader +# %bb.196: # %vector.body5709.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_197: # %vector.body5308 +.LBB5_197: # %vector.body5709 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2734,74 +2739,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_199: # %vector.body5283 +.LBB5_199: # %vector.body5678 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_199 -# %bb.200: # %vector.body5288.preheader +# %bb.200: # %vector.body5683.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_201: # %vector.body5288 +.LBB5_201: # %vector.body5683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_201 -# %bb.202: # %vector.body5293.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.202: # %vector.body5688.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_203: # %vector.body5293 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_203: # %vector.body5688 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_203 -# %bb.204: # %vector.body5298.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.204: # %vector.body5696.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_205: # %vector.body5298 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_205: # %vector.body5696 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_205 b .LBB5_573 .LBB5_206: # %.preheader.i1195.preheader @@ -2818,74 +2820,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_207: # %vector.body5263 +.LBB5_207: # %vector.body5652 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_207 -# %bb.208: # %vector.body5268.preheader +# %bb.208: # %vector.body5657.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_209: # %vector.body5268 +.LBB5_209: # %vector.body5657 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_209 -# %bb.210: # %vector.body5273.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.210: # %vector.body5662.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_211: # %vector.body5273 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_211: # %vector.body5662 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_211 -# %bb.212: # %vector.body5278.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.212: # %vector.body5670.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_213: # %vector.body5278 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_213: # %vector.body5670 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_213 b .LBB5_573 .LBB5_214: # %.preheader.i1217.preheader @@ -3072,31 +3071,30 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_221 -# %bb.222: # %vector.body5236.preheader - pcalau12i $a1, %pc_hi20(array+4) - addi.d $a1, $a1, %pc_lo12(array+4) - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_223: # %vector.body5236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_223 +# %bb.222: # %vector.body5622.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(array+8) + addi.d $a1, $a1, %pc_lo12(array+8) + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_223: # %vector.body5622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_223 # %bb.224: # %.preheader34.i1253.preheader lu12i.w $a1, 284 ori $a1, $a1, 1888 @@ -3162,87 +3160,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_227: # %vector.body5211 +.LBB5_227: # %vector.body5588 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_227 -# %bb.228: # %vector.body5216.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.228: # %vector.body5593.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_229: # %vector.body5216 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_229: # %vector.body5593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_229 -# %bb.230: # %vector.body5221.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.230: # %vector.body5601.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_231: # %vector.body5221 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_231: # %vector.body5601 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_231 -# %bb.232: # %vector.body5226.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.232: # %vector.body5609.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_233: # %vector.body5226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_233: # %vector.body5609 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_233 b .LBB5_573 .LBB5_234: # %.preheader.i1284.preheader @@ -3259,32 +3252,32 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -3264 -.LBB5_235: # %vector.body5196 +.LBB5_235: # %vector.body5573 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_235 -# %bb.236: # %vector.body5201.preheader +# %bb.236: # %vector.body5578.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_237: # %vector.body5201 +.LBB5_237: # %vector.body5578 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_237 -# %bb.238: # %vector.body5206.preheader +# %bb.238: # %vector.body5583.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_239: # %vector.body5206 +.LBB5_239: # %vector.body5583 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -3292,30 +3285,30 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_239 b .LBB5_573 -.LBB5_240: # %vector.body5184.preheader +.LBB5_240: # %vector.body5561.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_241: # %vector.body5184 +.LBB5_241: # %vector.body5561 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_241 -# %bb.242: # %vector.body5189.preheader +# %bb.242: # %vector.body5566.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_243: # %vector.body5189 +.LBB5_243: # %vector.body5566 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3392,65 +3385,62 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_245 -# %bb.246: # %vector.body5174.preheader +# %bb.246: # %vector.body5545.preheader lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_247: # %vector.body5174 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_247: # %vector.body5545 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_247 -# %bb.248: # %vector.body5179.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.248: # %vector.body5553.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_249: # %vector.body5179 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_249: # %vector.body5553 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_249 b .LBB5_573 -.LBB5_250: # %vector.body5157.preheader +.LBB5_250: # %vector.body5528.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_251: # %vector.body5157 +.LBB5_251: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3506,30 +3496,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_253 b .LBB5_573 -.LBB5_254: # %vector.body5145.preheader +.LBB5_254: # %vector.body5516.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_255: # %vector.body5145 +.LBB5_255: # %vector.body5516 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_255 -# %bb.256: # %vector.body5150.preheader +# %bb.256: # %vector.body5521.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_257: # %vector.body5150 +.LBB5_257: # %vector.body5521 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3561,14 +3551,14 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_257 b .LBB5_573 -.LBB5_258: # %vector.body5125.preheader +.LBB5_258: # %vector.body5487.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 xvldi $xr0, -1424 -.LBB5_259: # %vector.body5125 +.LBB5_259: # %vector.body5487 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3583,274 +3573,263 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 + lu12i.w $a1, 62 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 2104 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_261: # %vector.body5130 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_261: # %vector.body5492 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_261 -# %bb.262: # %vector.body5135.preheader - lu12i.w $a0, 93 - ori $a0, $a0, 3156 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 +# %bb.262: # %vector.body5500.preheader + lu12i.w $a1, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 3160 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_263: # %vector.body5135 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_263: # %vector.body5500 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_263 -# %bb.264: # %vector.body5140.preheader - lu12i.w $a0, 125 - ori $a0, $a0, 132 +# %bb.264: # %vector.body5508.preheader + lu12i.w $a1, 125 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a1, 136 add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - ori $a2, $s0, 3328 -.LBB5_265: # %vector.body5140 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_265 + ori $a1, $s0, 3328 +.LBB5_265: # %vector.body5508 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_265 b .LBB5_573 -.LBB5_266: # %vector.body5094.preheader +.LBB5_266: # %vector.body5450.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_267: # %vector.body5094 +.LBB5_267: # %vector.body5450 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_267 -# %bb.268: # %vector.body5099.preheader +# %bb.268: # %vector.body5455.preheader lu12i.w $a2, 31 ori $a3, $a2, 1048 add.d $a4, $a0, $a3 lu12i.w $a3, 3 ori $a5, $a3, 3712 lu12i.w $a6, 260096 -.LBB5_269: # %vector.body5099 +.LBB5_269: # %vector.body5455 # =>This Inner Loop Header: Depth=1 st.w $a6, $a4, -8 st.w $a6, $a4, 0 addi.d $a5, $a5, -2 addi.d $a4, $a4, 16 bnez $a5, .LBB5_269 -# %bb.270: # %vector.body5104.preheader +# %bb.270: # %vector.body5460.preheader ori $a2, $a2, 1052 add.d $a2, $a0, $a2 ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 -.LBB5_271: # %vector.body5104 +.LBB5_271: # %vector.body5460 # =>This Inner Loop Header: Depth=1 st.w $a4, $a2, -8 st.w $a4, $a2, 0 addi.d $a3, $a3, -2 addi.d $a2, $a2, 16 bnez $a3, .LBB5_271 -# %bb.272: # %vector.body5110.preheader +# %bb.272: # %vector.body5466.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_273: # %vector.body5110 +.LBB5_273: # %vector.body5466 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_273 -# %bb.274: # %vector.body5115.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.274: # %vector.body5471.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_275: # %vector.body5115 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_275: # %vector.body5471 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_275 -# %bb.276: # %vector.body5120.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.276: # %vector.body5479.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_277: # %vector.body5120 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_277: # %vector.body5479 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_277 b .LBB5_573 -.LBB5_278: # %vector.body5079.preheader +.LBB5_278: # %vector.body5429.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_279: # %vector.body5079 +.LBB5_279: # %vector.body5429 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_279 -# %bb.280: # %vector.body5084.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.280: # %vector.body5434.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_281: # %vector.body5084 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_281: # %vector.body5434 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_281 -# %bb.282: # %vector.body5089.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.282: # %vector.body5442.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_283: # %vector.body5089 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_283: # %vector.body5442 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_283 b .LBB5_573 -.LBB5_284: # %vector.body5067.preheader +.LBB5_284: # %vector.body5417.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_285: # %vector.body5067 +.LBB5_285: # %vector.body5417 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_285 -# %bb.286: # %vector.body5072.preheader +# %bb.286: # %vector.body5422.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_287: # %vector.body5072 +.LBB5_287: # %vector.body5422 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3882,30 +3861,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_287 b .LBB5_573 -.LBB5_288: # %vector.body5055.preheader +.LBB5_288: # %vector.body5405.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_289: # %vector.body5055 +.LBB5_289: # %vector.body5405 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_289 -# %bb.290: # %vector.body5060.preheader +# %bb.290: # %vector.body5410.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_291: # %vector.body5060 +.LBB5_291: # %vector.body5410 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3937,30 +3916,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_291 b .LBB5_573 -.LBB5_292: # %vector.body5043.preheader +.LBB5_292: # %vector.body5393.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_293: # %vector.body5043 +.LBB5_293: # %vector.body5393 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_293 -# %bb.294: # %vector.body5048.preheader +# %bb.294: # %vector.body5398.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_295: # %vector.body5048 +.LBB5_295: # %vector.body5398 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3992,30 +3971,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_295 b .LBB5_573 -.LBB5_296: # %vector.body5031.preheader +.LBB5_296: # %vector.body5381.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_297: # %vector.body5031 +.LBB5_297: # %vector.body5381 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_297 -# %bb.298: # %vector.body5036.preheader +# %bb.298: # %vector.body5386.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_299: # %vector.body5036 +.LBB5_299: # %vector.body5386 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4047,30 +4026,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_299 b .LBB5_573 -.LBB5_300: # %vector.body5019.preheader +.LBB5_300: # %vector.body5369.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_301: # %vector.body5019 +.LBB5_301: # %vector.body5369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_301 -# %bb.302: # %vector.body5024.preheader +# %bb.302: # %vector.body5374.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_303: # %vector.body5024 +.LBB5_303: # %vector.body5374 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4102,69 +4081,66 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_303 b .LBB5_573 -.LBB5_304: # %vector.body5004.preheader +.LBB5_304: # %vector.body5348.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_305: # %vector.body5004 +.LBB5_305: # %vector.body5348 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_305 -# %bb.306: # %vector.body5009.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.306: # %vector.body5353.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_307: # %vector.body5009 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_307: # %vector.body5353 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_307 -# %bb.308: # %vector.body5014.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.308: # %vector.body5361.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_309: # %vector.body5014 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_309: # %vector.body5361 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_309 b .LBB5_573 .LBB5_310: # %.preheader.i1469.preheader @@ -4181,254 +4157,241 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_311: # %vector.body4984 +.LBB5_311: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_311 -# %bb.312: # %vector.body4989.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.312: # %vector.body5324.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_313: # %vector.body4989 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_313: # %vector.body5324 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_313 -# %bb.314: # %vector.body4994.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.314: # %vector.body5332.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_315: # %vector.body4994 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_315: # %vector.body5332 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_315 -# %bb.316: # %vector.body4999.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.316: # %vector.body5340.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_317: # %vector.body4999 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_317: # %vector.body5340 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_317 b .LBB5_573 -.LBB5_318: # %vector.body4964.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_319: # %vector.body4964 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_318: # %vector.body5293.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_319: # %vector.body5293 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_319 -# %bb.320: # %vector.body4969.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1072 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 +# %bb.320: # %vector.body5301.preheader + lu12i.w $a3, 31 + ori $a3, $a3, 1072 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 xvldi $xr0, -1424 -.LBB5_321: # %vector.body4969 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_321 -# %bb.322: # %vector.body4974.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2128 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 -.LBB5_323: # %vector.body4974 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_323 -# %bb.324: # %vector.body4979.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_325: # %vector.body4979 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_321: # %vector.body5301 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_321 +# %bb.322: # %vector.body5306.preheader + lu12i.w $a3, 62 + ori $a3, $a3, 2128 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 +.LBB5_323: # %vector.body5306 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_323 +# %bb.324: # %vector.body5311.preheader + lu12i.w $a3, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 3160 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_325: # %vector.body5311 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_325 b .LBB5_573 -.LBB5_326: # %vector.body4944.preheader +.LBB5_326: # %vector.body5264.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_327: # %vector.body4944 +.LBB5_327: # %vector.body5264 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_327 -# %bb.328: # %vector.body4949.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.328: # %vector.body5269.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_329: # %vector.body4949 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_329: # %vector.body5269 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_329 -# %bb.330: # %vector.body4954.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.330: # %vector.body5277.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_331: # %vector.body4954 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_331: # %vector.body5277 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_331 -# %bb.332: # %vector.body4959.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.332: # %vector.body5285.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_333: # %vector.body4959 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_333: # %vector.body5285 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_333 b .LBB5_573 .LBB5_334: # %.preheader.i1529.preheader @@ -4445,19 +4408,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_335: # %vector.body4934 +.LBB5_335: # %vector.body5254 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_335 -# %bb.336: # %vector.body4939.preheader +# %bb.336: # %vector.body5259.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_337: # %vector.body4939 +.LBB5_337: # %vector.body5259 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -4915,69 +4878,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_358 b .LBB5_573 -.LBB5_359: # %vector.body4850.preheader +.LBB5_359: # %vector.body5164.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_360: # %vector.body4850 +.LBB5_360: # %vector.body5164 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_360 -# %bb.361: # %vector.body4855.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.361: # %vector.body5169.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_362: # %vector.body4855 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a5 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_362: # %vector.body5169 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a3, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_362 -# %bb.363: # %vector.body4860.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.363: # %vector.body5177.preheader + lu12i.w $a3, 62 + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 ori $a1, $a1, 3328 -.LBB5_364: # %vector.body4860 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a4 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 +.LBB5_364: # %vector.body5177 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a2, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a1, $a1, -4 + addi.d $a2, $a2, 16 bnez $a1, .LBB5_364 # %bb.365: # %.preheader.i1631.preheader lu12i.w $a1, 156 @@ -5071,50 +5031,50 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_368 b .LBB5_573 -.LBB5_369: # %vector.body4830.preheader +.LBB5_369: # %vector.body5144.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_370: # %vector.body4830 +.LBB5_370: # %vector.body5144 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_370 -# %bb.371: # %vector.body4835.preheader +# %bb.371: # %vector.body5149.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_372: # %vector.body4835 +.LBB5_372: # %vector.body5149 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_372 -# %bb.373: # %vector.body4840.preheader +# %bb.373: # %vector.body5154.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_374: # %vector.body4840 +.LBB5_374: # %vector.body5154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_374 -# %bb.375: # %vector.body4845.preheader +# %bb.375: # %vector.body5159.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_376: # %vector.body4845 +.LBB5_376: # %vector.body5159 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5122,7 +5082,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_376 b .LBB5_573 -.LBB5_377: # %vector.body4810.preheader +.LBB5_377: # %vector.body5124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -5131,43 +5091,43 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_378: # %vector.body4810 +.LBB5_378: # %vector.body5124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_378 -# %bb.379: # %vector.body4815.preheader +# %bb.379: # %vector.body5129.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_380: # %vector.body4815 +.LBB5_380: # %vector.body5129 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_380 -# %bb.381: # %vector.body4820.preheader +# %bb.381: # %vector.body5134.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_382: # %vector.body4820 +.LBB5_382: # %vector.body5134 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_382 -# %bb.383: # %vector.body4825.preheader +# %bb.383: # %vector.body5139.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_384: # %vector.body4825 +.LBB5_384: # %vector.body5139 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5189,87 +5149,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_386: # %vector.body4790 +.LBB5_386: # %vector.body5095 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_386 -# %bb.387: # %vector.body4795.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.387: # %vector.body5100.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_388: # %vector.body4795 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_388: # %vector.body5100 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_388 -# %bb.389: # %vector.body4800.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.389: # %vector.body5108.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_390: # %vector.body4800 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_390: # %vector.body5108 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_390 -# %bb.391: # %vector.body4805.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.391: # %vector.body5116.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_392: # %vector.body4805 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_392: # %vector.body5116 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_392 b .LBB5_573 .LBB5_393: # %.preheader.i1701.preheader @@ -5286,14 +5241,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_394: # %vector.body4775 +.LBB5_394: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_394 -# %bb.395: # %vector.body4780.preheader +# %bb.395: # %vector.body5085.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -5301,19 +5256,19 @@ init: # @init lu12i.w $a3, 219235 ori $a3, $a3, 1981 xvreplgr2vr.w $xr0, $a3 -.LBB5_396: # %vector.body4780 +.LBB5_396: # %vector.body5085 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_396 -# %bb.397: # %vector.body4785.preheader +# %bb.397: # %vector.body5090.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_398: # %vector.body4785 +.LBB5_398: # %vector.body5090 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5335,87 +5290,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_400: # %vector.body4755 +.LBB5_400: # %vector.body5051 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_400 -# %bb.401: # %vector.body4760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.401: # %vector.body5056.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_402: # %vector.body4760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_402: # %vector.body5056 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_402 -# %bb.403: # %vector.body4765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.403: # %vector.body5064.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_404: # %vector.body4765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_404: # %vector.body5064 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_404 -# %bb.405: # %vector.body4770.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.405: # %vector.body5072.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_406: # %vector.body4770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_406: # %vector.body5072 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_406 b .LBB5_573 .LBB5_407: # %.preheader.i1740.preheader @@ -5432,19 +5382,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_408: # %vector.body4745 +.LBB5_408: # %vector.body5041 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_408 -# %bb.409: # %vector.body4750.preheader +# %bb.409: # %vector.body5046.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_410: # %vector.body4750 +.LBB5_410: # %vector.body5046 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5452,21 +5402,21 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_410 b .LBB5_573 -.LBB5_411: # %vector.body4725.preheader +.LBB5_411: # %vector.body5018.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_412: # %vector.body4725 +.LBB5_412: # %vector.body5018 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_412 -# %bb.413: # %vector.body4730.preheader +# %bb.413: # %vector.body5023.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 @@ -5474,49 +5424,48 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr1, $a4 -.LBB5_414: # %vector.body4730 +.LBB5_414: # %vector.body5023 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a2, -32 xvst $xr1, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_414 -# %bb.415: # %vector.body4735.preheader +# %bb.415: # %vector.body5028.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_416: # %vector.body4735 +.LBB5_416: # %vector.body5028 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_416 -# %bb.417: # %vector.body4740.preheader +# %bb.417: # %vector.body5033.preheader lu12i.w $a2, 93 - ori $a2, $a2, 3156 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_418: # %vector.body4740 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_418: # %vector.body5033 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_418 b .LBB5_573 .LBB5_419: # %.preheader.i1769.preheader @@ -5533,7 +5482,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_420: # %vector.body4720 +.LBB5_420: # %vector.body5013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5555,7 +5504,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_422: # %vector.body4715 +.LBB5_422: # %vector.body5008 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5563,14 +5512,14 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_422 b .LBB5_573 -.LBB5_423: # %vector.body4700.preheader +.LBB5_423: # %vector.body4993.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_424: # %vector.body4700 +.LBB5_424: # %vector.body4993 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5663,14 +5612,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_428 b .LBB5_573 -.LBB5_429: # %vector.body4685.preheader +.LBB5_429: # %vector.body4978.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_430: # %vector.body4685 +.LBB5_430: # %vector.body4978 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5763,32 +5712,31 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_434 b .LBB5_573 -.LBB5_435: # %vector.body4663.preheader +.LBB5_435: # %vector.body4947.preheader + pcalau12i $s1, %pc_hi20(.LCPI5_0) + vld $vr0, $s1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) - addi.d $a0, $s0, 4 - ori $a1, $zero, 2 - lu12i.w $s1, 7 - ori $a2, $s1, 3328 -.LBB5_436: # %vector.body4663 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_436 + addi.d $a0, $s0, 8 + lu12i.w $s2, 7 + ori $a1, $s2, 3328 +.LBB5_436: # %vector.body4947 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_436 # %bb.437: # %.preheader.i1826.preheader lu12i.w $a1, 31 ori $a0, $a1, 1040 @@ -5799,54 +5747,50 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 2104 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_438: # %vector.body4668 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_438 -# %bb.439: # %vector.body4673.preheader + ori $a1, $s2, 3328 +.LBB5_438: # %vector.body4955 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_438 +# %bb.439: # %vector.body4963.preheader lu12i.w $a0, 93 - ori $a0, $a0, 3156 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 3160 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_440: # %vector.body4673 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_440 + ori $a1, $s2, 3328 +.LBB5_440: # %vector.body4963 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_440 # %bb.441: # %.preheader.i1840.preheader lu12i.w $a0, 125 ori $a0, $a0, 128 @@ -5905,30 +5849,30 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_442 b .LBB5_573 -.LBB5_443: # %vector.body4639.preheader +.LBB5_443: # %vector.body4923.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_444: # %vector.body4639 +.LBB5_444: # %vector.body4923 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_444 -# %bb.445: # %vector.body4644.preheader +# %bb.445: # %vector.body4928.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 -.LBB5_446: # %vector.body4644 +.LBB5_446: # %vector.body4928 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5959,14 +5903,14 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_446 -# %bb.447: # %vector.body4651.preheader +# %bb.447: # %vector.body4935.preheader lu12i.w $a4, 62 - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 2096 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_448: # %vector.body4651 +.LBB5_448: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5997,12 +5941,12 @@ init: # @init addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 bnez $a3, .LBB5_448 -# %bb.449: # %vector.body4658.preheader +# %bb.449: # %vector.body4942.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_450: # %vector.body4658 +.LBB5_450: # %vector.body4942 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6010,154 +5954,148 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_450 b .LBB5_573 -.LBB5_451: # %vector.body4624.preheader +.LBB5_451: # %vector.body4902.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_452: # %vector.body4624 +.LBB5_452: # %vector.body4902 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_452 -# %bb.453: # %vector.body4629.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.453: # %vector.body4907.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_454: # %vector.body4629 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_454: # %vector.body4907 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_454 -# %bb.455: # %vector.body4634.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.455: # %vector.body4915.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_456: # %vector.body4634 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_456: # %vector.body4915 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_456 b .LBB5_573 -.LBB5_457: # %vector.body4599.preheader +.LBB5_457: # %vector.body4871.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_458: # %vector.body4599 +.LBB5_458: # %vector.body4871 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_458 -# %bb.459: # %vector.body4604.preheader +# %bb.459: # %vector.body4876.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_460: # %vector.body4604 +.LBB5_460: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_460 -# %bb.461: # %vector.body4609.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.461: # %vector.body4881.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_462: # %vector.body4609 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_462: # %vector.body4881 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_462 -# %bb.463: # %vector.body4614.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.463: # %vector.body4889.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_464: # %vector.body4614 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_464 -# %bb.465: # %vector.body4619.preheader + ori $a3, $a1, 3328 +.LBB5_464: # %vector.body4889 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 + bnez $a3, .LBB5_464 +# %bb.465: # %vector.body4897.preheader lu12i.w $a2, 125 ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3264 -.LBB5_466: # %vector.body4619 +.LBB5_466: # %vector.body4897 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6165,45 +6103,45 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_466 b .LBB5_573 -.LBB5_467: # %vector.body4574.preheader +.LBB5_467: # %vector.body4843.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_468: # %vector.body4574 +.LBB5_468: # %vector.body4843 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_468 -# %bb.469: # %vector.body4579.preheader +# %bb.469: # %vector.body4848.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_470: # %vector.body4579 +.LBB5_470: # %vector.body4848 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_470 -# %bb.471: # %vector.body4584.preheader +# %bb.471: # %vector.body4853.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_472: # %vector.body4584 +.LBB5_472: # %vector.body4853 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_472 -# %bb.473: # %vector.body4589.preheader +# %bb.473: # %vector.body4858.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a2, $a0, $a2 @@ -6211,37 +6149,36 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_474: # %vector.body4589 +.LBB5_474: # %vector.body4858 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_474 -# %bb.475: # %vector.body4594.preheader +# %bb.475: # %vector.body4863.preheader lu12i.w $a2, 125 - ori $a2, $a2, 132 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_476: # %vector.body4594 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_476: # %vector.body4863 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_476 b .LBB5_573 .LBB5_477: # %.preheader.i1927.preheader @@ -6258,74 +6195,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_478: # %vector.body4554 +.LBB5_478: # %vector.body4817 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_478 -# %bb.479: # %vector.body4559.preheader +# %bb.479: # %vector.body4822.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_480: # %vector.body4559 +.LBB5_480: # %vector.body4822 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_480 -# %bb.481: # %vector.body4564.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.481: # %vector.body4827.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_482: # %vector.body4564 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_482: # %vector.body4827 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_482 -# %bb.483: # %vector.body4569.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.483: # %vector.body4835.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_484: # %vector.body4569 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_484: # %vector.body4835 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_484 b .LBB5_573 .LBB5_485: # %.preheader.i1949.preheader @@ -6461,669 +6395,638 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_490 b .LBB5_573 -.LBB5_491: # %vector.body4519.preheader +.LBB5_491: # %vector.body4773.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_492: # %vector.body4519 +.LBB5_492: # %vector.body4773 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_492 -# %bb.493: # %vector.body4524.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.493: # %vector.body4778.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_494: # %vector.body4524 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_494: # %vector.body4778 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_494 -# %bb.495: # %vector.body4529.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.495: # %vector.body4786.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_496: # %vector.body4529 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_496: # %vector.body4786 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_496 -# %bb.497: # %vector.body4534.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.497: # %vector.body4794.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_498: # %vector.body4534 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_498: # %vector.body4794 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_498 b .LBB5_573 -.LBB5_499: # %vector.body4489.preheader +.LBB5_499: # %vector.body4734.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_500: # %vector.body4489 +.LBB5_500: # %vector.body4734 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_500 -# %bb.501: # %vector.body4494.preheader +# %bb.501: # %vector.body4739.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_502: # %vector.body4494 +.LBB5_502: # %vector.body4739 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_502 -# %bb.503: # %vector.body4499.preheader +# %bb.503: # %vector.body4744.preheader lu12i.w $a2, 46 ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_504: # %vector.body4499 +.LBB5_504: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_504 -# %bb.505: # %vector.body4504.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.505: # %vector.body4749.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_506: # %vector.body4504 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_506: # %vector.body4749 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_506 -# %bb.507: # %vector.body4509.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.507: # %vector.body4757.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_508: # %vector.body4509 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_508: # %vector.body4757 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_508 -# %bb.509: # %vector.body4514.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.509: # %vector.body4765.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_510: # %vector.body4514 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_510: # %vector.body4765 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_510 b .LBB5_573 -.LBB5_511: # %vector.body4459.preheader +.LBB5_511: # %vector.body4695.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_512: # %vector.body4459 +.LBB5_512: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_512 -# %bb.513: # %vector.body4464.preheader +# %bb.513: # %vector.body4700.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_514: # %vector.body4464 +.LBB5_514: # %vector.body4700 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_514 -# %bb.515: # %vector.body4469.preheader +# %bb.515: # %vector.body4705.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_516: # %vector.body4469 +.LBB5_516: # %vector.body4705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_516 -# %bb.517: # %vector.body4474.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.517: # %vector.body4710.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_518: # %vector.body4474 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_518: # %vector.body4710 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_518 -# %bb.519: # %vector.body4479.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.519: # %vector.body4718.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_520: # %vector.body4479 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_520: # %vector.body4718 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_520 -# %bb.521: # %vector.body4484.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.521: # %vector.body4726.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_522: # %vector.body4484 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_522: # %vector.body4726 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_522 b .LBB5_573 -.LBB5_523: # %vector.body4429.preheader +.LBB5_523: # %vector.body4656.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_524: # %vector.body4429 +.LBB5_524: # %vector.body4656 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_524 -# %bb.525: # %vector.body4434.preheader +# %bb.525: # %vector.body4661.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_526: # %vector.body4434 +.LBB5_526: # %vector.body4661 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_526 -# %bb.527: # %vector.body4439.preheader +# %bb.527: # %vector.body4666.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_528: # %vector.body4439 +.LBB5_528: # %vector.body4666 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_528 -# %bb.529: # %vector.body4444.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.529: # %vector.body4671.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_530: # %vector.body4444 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_530: # %vector.body4671 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_530 -# %bb.531: # %vector.body4449.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.531: # %vector.body4679.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_532: # %vector.body4449 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_532: # %vector.body4679 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_532 -# %bb.533: # %vector.body4454.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.533: # %vector.body4687.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_534: # %vector.body4454 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_534: # %vector.body4687 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_534 b .LBB5_573 -.LBB5_535: # %vector.body4404.preheader +.LBB5_535: # %vector.body4622.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_536: # %vector.body4404 +.LBB5_536: # %vector.body4622 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_536 -# %bb.537: # %vector.body4409.preheader +# %bb.537: # %vector.body4627.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_538: # %vector.body4409 +.LBB5_538: # %vector.body4627 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_538 -# %bb.539: # %vector.body4414.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.539: # %vector.body4632.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_540: # %vector.body4414 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_540: # %vector.body4632 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_540 -# %bb.541: # %vector.body4419.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.541: # %vector.body4640.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_542: # %vector.body4419 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_542: # %vector.body4640 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_542 -# %bb.543: # %vector.body4424.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.543: # %vector.body4648.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_544: # %vector.body4424 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_544: # %vector.body4648 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_544 b .LBB5_573 -.LBB5_545: # %vector.body4389.preheader +.LBB5_545: # %vector.body4601.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_546: # %vector.body4389 +.LBB5_546: # %vector.body4601 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_546 -# %bb.547: # %vector.body4394.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.547: # %vector.body4606.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_548: # %vector.body4394 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_548: # %vector.body4606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_548 -# %bb.549: # %vector.body4399.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.549: # %vector.body4614.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_550: # %vector.body4399 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_550: # %vector.body4614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_550 b .LBB5_573 -.LBB5_551: # %vector.body4374.preheader +.LBB5_551: # %vector.body4580.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_552: # %vector.body4374 +.LBB5_552: # %vector.body4580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_552 -# %bb.553: # %vector.body4379.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.553: # %vector.body4585.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_554: # %vector.body4379 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_554: # %vector.body4585 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_554 -# %bb.555: # %vector.body4384.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.555: # %vector.body4593.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_556: # %vector.body4384 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_556: # %vector.body4593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_556 b .LBB5_573 .LBB5_557: # %.preheader.i2121.preheader @@ -7140,19 +7043,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_558: # %vector.body4364 +.LBB5_558: # %vector.body4570 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_558 -# %bb.559: # %vector.body4369.preheader +# %bb.559: # %vector.body4575.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_560: # %vector.body4369 +.LBB5_560: # %vector.body4575 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7174,7 +7077,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_562: # %vector.body4359 +.LBB5_562: # %vector.body4565 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7196,7 +7099,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_564: # %vector.body4354 +.LBB5_564: # %vector.body4560 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7204,31 +7107,30 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_564 b .LBB5_573 -.LBB5_565: # %vector.body4349.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_566: # %vector.body4349 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_566 +.LBB5_565: # %vector.body4552.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_566: # %vector.body4552 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_566 b .LBB5_573 .LBB5_567: # %.preheader.i2154.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7377,43 +7279,43 @@ init: # @init bnez $a2, .LBB5_572 .LBB5_573: # %set1d.exit1086 move $a0, $zero - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $fp, $sp, 64 # 8-byte Folded Reload - ld.d $ra, $sp, 72 # 8-byte Folded Reload - addi.d $sp, $sp, 80 + ld.d $s2, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload + ld.d $s0, $sp, 72 # 8-byte Folded Reload + ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $ra, $sp, 88 # 8-byte Folded Reload + addi.d $sp, $sp, 96 ret .LBB5_574: # %.preheader.i2177.preheader pcalau12i $a0, %pc_hi20(global_data+640192) addi.d $a0, $a0, %pc_lo12(global_data+640192) b .LBB5_345 -.LBB5_575: # %vector.body4325.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_576: # %vector.body4325 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_576 +.LBB5_575: # %vector.body4525.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_576: # %vector.body4525 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_576 b .LBB5_573 -.LBB5_577: # %vector.body4320.preheader +.LBB5_577: # %vector.body4520.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 @@ -7421,7 +7323,7 @@ init: # @init lu12i.w $a2, 260096 ori $a2, $a2, 8 xvreplgr2vr.w $xr0, $a2 -.LBB5_578: # %vector.body4320 +.LBB5_578: # %vector.body4520 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7429,162 +7331,155 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_578 b .LBB5_573 -.LBB5_579: # %vector.body4310.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_580: # %vector.body4310 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_579: # %vector.body4504.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_580: # %vector.body4504 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_580 -# %bb.581: # %vector.body4315.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_582: # %vector.body4315 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.581: # %vector.body4512.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_582: # %vector.body4512 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_582 b .LBB5_573 -.LBB5_583: # %vector.body4305.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_584: # %vector.body4305 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_584 +.LBB5_583: # %vector.body4496.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_584: # %vector.body4496 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_584 b .LBB5_573 -.LBB5_585: # %vector.body4300.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_586: # %vector.body4300 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_586 +.LBB5_585: # %vector.body4488.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_586: # %vector.body4488 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_586 b .LBB5_573 -.LBB5_587: # %vector.body4295.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_588: # %vector.body4295 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_588 +.LBB5_587: # %vector.body4480.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_588: # %vector.body4480 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_588 b .LBB5_573 -.LBB5_589: # %vector.body4290.preheader +.LBB5_589: # %vector.body4472.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_590: # %vector.body4290 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_590 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_590: # %vector.body4472 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_590 b .LBB5_607 .LBB5_591: # %.preheader.i2230.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7603,79 +7498,74 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a1, $fp, $a0 - ori $a2, $zero, 2 + ori $a0, $a0, 2104 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + add.d $a2, $fp, $a0 lu12i.w $a0, 7 ori $a3, $a0, 3328 -.LBB5_592: # %vector.body4275 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_592: # %vector.body4448 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_592 -# %bb.593: # %vector.body4280.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.593: # %vector.body4456.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_594: # %vector.body4280 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_594: # %vector.body4456 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_594 -# %bb.595: # %vector.body4285.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.595: # %vector.body4464.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_596: # %vector.body4285 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_596: # %vector.body4464 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_596 b .LBB5_573 .LBB5_597: # %.preheader34.i2253.preheader @@ -7734,42 +7624,41 @@ init: # @init lu12i.w $a1, 220 ori $a1, $a1, 1212 b .LBB5_630 -.LBB5_600: # %vector.body4263.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_601: # %vector.body4263 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_601 - b .LBB5_573 -.LBB5_602: # %vector.body4256.preheader +.LBB5_600: # %vector.body4433.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_601: # %vector.body4433 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_601 + b .LBB5_573 +.LBB5_602: # %vector.body4426.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_603: # %vector.body4256 +.LBB5_603: # %vector.body4426 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -7804,45 +7693,44 @@ init: # @init pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) b .LBB5_610 -.LBB5_605: # %vector.body4251.preheader +.LBB5_605: # %vector.body4418.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_606: # %vector.body4251 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_606 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_606: # %vector.body4418 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_606 .LBB5_607: # %set1d.exit2229 lu12i.w $a1, 31 ori $a1, $a1, 1020 lu12i.w $a2, -262144 b .LBB5_687 -.LBB5_608: # %vector.body4246.preheader +.LBB5_608: # %vector.body4413.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_609: # %vector.body4246 +.LBB5_609: # %vector.body4413 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7855,14 +7743,14 @@ init: # @init add.d $a0, $a0, $a2 ori $a2, $a1, 1024 b .LBB5_346 -.LBB5_611: # %vector.body4241.preheader +.LBB5_611: # %vector.body4408.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_612: # %vector.body4241 +.LBB5_612: # %vector.body4408 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7886,147 +7774,141 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 b .LBB5_573 -.LBB5_614: # %vector.body4216.preheader +.LBB5_614: # %vector.body4374.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_615: # %vector.body4216 +.LBB5_615: # %vector.body4374 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_615 -# %bb.616: # %vector.body4221.preheader +# %bb.616: # %vector.body4379.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_617: # %vector.body4221 +.LBB5_617: # %vector.body4379 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_617 -# %bb.618: # %vector.body4226.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.618: # %vector.body4384.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_619: # %vector.body4226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_619: # %vector.body4384 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_619 -# %bb.620: # %vector.body4231.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.620: # %vector.body4392.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_621: # %vector.body4231 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_621: # %vector.body4392 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_621 -# %bb.622: # %vector.body4236.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.622: # %vector.body4400.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_623: # %vector.body4236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_623: # %vector.body4400 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_623 b .LBB5_573 -.LBB5_624: # %vector.body4211.preheader +.LBB5_624: # %vector.body4366.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_625: # %vector.body4211 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_625 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_625: # %vector.body4366 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_625 # %bb.626: # %set1d.exit2327 lu12i.w $a1, 31 b .LBB5_686 -.LBB5_627: # %vector.body4204.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_627: # %vector.body4359.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_628: # %vector.body4204 +.LBB5_628: # %vector.body4359 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8075,81 +7957,77 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ori $a0, $s0, 1044 + ori $a0, $s0, 1048 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_632: # %vector.body4199 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_632 - b .LBB5_573 -.LBB5_633: # %vector.body4189.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_634: # %vector.body4189 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_634 -# %bb.635: # %vector.body4194.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_636: # %vector.body4194 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_632: # %vector.body4351 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_632 + b .LBB5_573 +.LBB5_633: # %vector.body4335.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_634: # %vector.body4335 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_634 +# %bb.635: # %vector.body4343.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_636: # %vector.body4343 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_636 b .LBB5_573 .LBB5_637: # %.preheader34.i2352.preheader @@ -8248,26 +8126,26 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_640 b .LBB5_573 -.LBB5_641: # %vector.body4167.preheader +.LBB5_641: # %vector.body4313.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_642: # %vector.body4167 +.LBB5_642: # %vector.body4313 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_642 -# %bb.643: # %vector.body4172.preheader +# %bb.643: # %vector.body4318.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_644: # %vector.body4172 +.LBB5_644: # %vector.body4318 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8275,78 +8153,75 @@ init: # @init addi.d $a2, $a2, 64 bnez $a1, .LBB5_644 b .LBB5_653 -.LBB5_645: # %vector.body4157.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_646: # %vector.body4157 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_646 -# %bb.647: # %vector.body4162.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_648: # %vector.body4162 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_645: # %vector.body4297.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_646: # %vector.body4297 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_646 +# %bb.647: # %vector.body4305.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_648: # %vector.body4305 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_648 b .LBB5_573 -.LBB5_649: # %vector.body4147.preheader +.LBB5_649: # %vector.body4287.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_650: # %vector.body4147 +.LBB5_650: # %vector.body4287 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_650 -# %bb.651: # %vector.body4152.preheader +# %bb.651: # %vector.body4292.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_652: # %vector.body4152 +.LBB5_652: # %vector.body4292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8359,134 +8234,128 @@ init: # @init lu12i.w $a2, 260096 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_654: # %vector.body4132.preheader +.LBB5_654: # %vector.body4266.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_655: # %vector.body4132 +.LBB5_655: # %vector.body4266 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_655 -# %bb.656: # %vector.body4137.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.656: # %vector.body4271.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_657: # %vector.body4137 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_657: # %vector.body4271 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_657 -# %bb.658: # %vector.body4142.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.658: # %vector.body4279.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_659: # %vector.body4142 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_659: # %vector.body4279 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_659 b .LBB5_573 -.LBB5_660: # %vector.body4117.preheader +.LBB5_660: # %vector.body4245.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_661: # %vector.body4117 +.LBB5_661: # %vector.body4245 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_661 -# %bb.662: # %vector.body4122.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.662: # %vector.body4250.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_663: # %vector.body4122 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_663: # %vector.body4250 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_663 -# %bb.664: # %vector.body4127.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.664: # %vector.body4258.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_665: # %vector.body4127 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_665: # %vector.body4258 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_665 b .LBB5_573 .LBB5_666: # %.preheader.i2421.preheader @@ -8503,74 +8372,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_667: # %vector.body4097 +.LBB5_667: # %vector.body4219 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_667 -# %bb.668: # %vector.body4102.preheader +# %bb.668: # %vector.body4224.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_669: # %vector.body4102 +.LBB5_669: # %vector.body4224 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_669 -# %bb.670: # %vector.body4107.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.670: # %vector.body4229.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_671: # %vector.body4107 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_671: # %vector.body4229 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_671 -# %bb.672: # %vector.body4112.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.672: # %vector.body4237.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_673: # %vector.body4112 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_673: # %vector.body4237 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_673 b .LBB5_573 .LBB5_674: # %.preheader.i2443.preheader @@ -8719,69 +8585,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_679 b .LBB5_573 -.LBB5_680: # %vector.body4063.preheader +.LBB5_680: # %vector.body4179.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 xvldi $xr0, -1424 -.LBB5_681: # %vector.body4063 +.LBB5_681: # %vector.body4179 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a3, $a3, -16 addi.d $a1, $a1, 64 bnez $a3, .LBB5_681 -# %bb.682: # %vector.body4068.preheader +# %bb.682: # %vector.body4184.preheader lu12i.w $a1, 31 - ori $a3, $a1, 1044 - add.d $a3, $a0, $a3 - ori $a4, $zero, 2 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a4, $a1, 1048 + add.d $a4, $a0, $a4 ori $a5, $a2, 3328 -.LBB5_683: # %vector.body4068 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - addi.d $a7, $a4, -1 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa0, $a7 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a5, $a5, -2 - addi.w $a4, $a4, 2 +.LBB5_683: # %vector.body4184 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a4, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a5, $a5, -4 + addi.d $a4, $a4, 16 bnez $a5, .LBB5_683 -# %bb.684: # %vector.body4073.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2100 +# %bb.684: # %vector.body4192.preheader + lu12i.w $a4, 62 + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a3, $a4, 2104 add.d $a3, $a0, $a3 - ori $a4, $zero, 2 ori $a2, $a2, 3328 -.LBB5_685: # %vector.body4073 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a4, 31, 0 - addi.d $a6, $a4, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a2, $a2, -2 - addi.w $a4, $a4, 2 +.LBB5_685: # %vector.body4192 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a3, $a3, 16 bnez $a2, .LBB5_685 .LBB5_686: # %set1d.exit2479 ori $a1, $a1, 1020 @@ -8790,16 +8653,16 @@ init: # @init lu32i.d $a2, 0 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_688: # %vector.body4051.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_688: # %vector.body4167.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_689: # %vector.body4051 +.LBB5_689: # %vector.body4167 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8830,12 +8693,12 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_689 -# %bb.690: # %vector.body4058.preheader +# %bb.690: # %vector.body4174.preheader pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 xvldi $xr0, -1424 -.LBB5_691: # %vector.body4058 +.LBB5_691: # %vector.body4174 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -8843,28 +8706,28 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_691 b .LBB5_573 -.LBB5_692: # %vector.body4039.preheader +.LBB5_692: # %vector.body4155.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_693: # %vector.body4039 +.LBB5_693: # %vector.body4155 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_693 -# %bb.694: # %vector.body4044.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.694: # %vector.body4160.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_695: # %vector.body4044 +.LBB5_695: # %vector.body4160 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8904,15 +8767,15 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_697: # %vector.body4032 +.LBB5_697: # %vector.body4148 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8944,28 +8807,28 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_697 b .LBB5_573 -.LBB5_698: # %vector.body4020.preheader +.LBB5_698: # %vector.body4136.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_699: # %vector.body4020 +.LBB5_699: # %vector.body4136 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_699 -# %bb.700: # %vector.body4025.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.700: # %vector.body4141.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_701: # %vector.body4025 +.LBB5_701: # %vector.body4141 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8997,30 +8860,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_701 b .LBB5_573 -.LBB5_702: # %vector.body4008.preheader +.LBB5_702: # %vector.body4124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_703: # %vector.body4008 +.LBB5_703: # %vector.body4124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_703 -# %bb.704: # %vector.body4013.preheader +# %bb.704: # %vector.body4129.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_705: # %vector.body4013 +.LBB5_705: # %vector.body4129 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9052,30 +8915,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_705 b .LBB5_573 -.LBB5_706: # %vector.body3996.preheader +.LBB5_706: # %vector.body4112.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_707: # %vector.body3996 +.LBB5_707: # %vector.body4112 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_707 -# %bb.708: # %vector.body4001.preheader +# %bb.708: # %vector.body4117.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_709: # %vector.body4001 +.LBB5_709: # %vector.body4117 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9107,7 +8970,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_709 b .LBB5_573 -.LBB5_710: # %vector.body3971.preheader +.LBB5_710: # %vector.body4081.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 @@ -9115,7 +8978,7 @@ init: # @init ori $a1, $s0, 3328 xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill -.LBB5_711: # %vector.body3971 +.LBB5_711: # %vector.body4081 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9123,63 +8986,60 @@ init: # @init addi.d $a1, $a1, -16 addi.d $a0, $a0, 64 bnez $a1, .LBB5_711 -# %bb.712: # %vector.body3976.preheader +# %bb.712: # %vector.body4086.preheader lu12i.w $a1, 31 - ori $a0, $a1, 1044 - add.d $a0, $fp, $a0 - ori $a2, $zero, 2 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a2, $a1, 1048 + add.d $a2, $fp, $a2 ori $a3, $s0, 3328 -.LBB5_713: # %vector.body3976 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_713: # %vector.body4086 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_713 -# %bb.714: # %vector.body3981.preheader - lu12i.w $a0, 62 - ori $a0, $a0, 2100 +# %bb.714: # %vector.body4094.preheader + lu12i.w $a2, 62 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a2, 2104 add.d $a0, $fp, $a0 - ori $a2, $zero, 2 - ori $a3, $s0, 3328 -.LBB5_715: # %vector.body3981 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_715 -# %bb.716: # %vector.body3986.preheader + ori $a2, $s0, 3328 +.LBB5_715: # %vector.body4094 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a0, $a0, 16 + bnez $a2, .LBB5_715 +# %bb.716: # %vector.body4102.preheader lu12i.w $a0, 93 ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 xvldi $xr0, -1296 -.LBB5_717: # %vector.body3986 +.LBB5_717: # %vector.body4102 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -9198,7 +9058,7 @@ init: # @init ori $a0, $a0, 2500 add.d $a0, $fp, $a0 ori $a1, $s0, 3328 -.LBB5_719: # %vector.body3991 +.LBB5_719: # %vector.body4107 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9207,238 +9067,225 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_719 b .LBB5_573 -.LBB5_720: # %vector.body3946.preheader +.LBB5_720: # %vector.body4044.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_721: # %vector.body3946 +.LBB5_721: # %vector.body4044 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_721 -# %bb.722: # %vector.body3951.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.722: # %vector.body4049.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_723: # %vector.body3951 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_723: # %vector.body4049 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_723 -# %bb.724: # %vector.body3956.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.724: # %vector.body4057.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_725: # %vector.body3956 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_725: # %vector.body4057 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_725 -# %bb.726: # %vector.body3961.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.726: # %vector.body4065.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_727: # %vector.body3961 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_727: # %vector.body4065 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_727 -# %bb.728: # %vector.body3966.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.728: # %vector.body4073.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_729: # %vector.body3966 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_729: # %vector.body4073 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_729 b .LBB5_573 -.LBB5_730: # %vector.body3931.preheader +.LBB5_730: # %vector.body4023.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_731: # %vector.body3931 +.LBB5_731: # %vector.body4023 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_731 -# %bb.732: # %vector.body3936.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.732: # %vector.body4028.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_733: # %vector.body3936 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_733: # %vector.body4028 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_733 -# %bb.734: # %vector.body3941.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.734: # %vector.body4036.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_735: # %vector.body3941 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_735: # %vector.body4036 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_735 b .LBB5_573 -.LBB5_736: # %vector.body3921.preheader +.LBB5_736: # %vector.body4007.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_737: # %vector.body3921 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_737: # %vector.body4007 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_737 -# %bb.738: # %vector.body3926.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.738: # %vector.body4015.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_739: # %vector.body3926 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_739: # %vector.body4015 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_739 b .LBB5_573 .LBB5_740: # %.preheader.i2608.preheader @@ -9455,14 +9302,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_741: # %vector.body3911 +.LBB5_741: # %vector.body3997 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_741 -# %bb.742: # %vector.body3916.preheader +# %bb.742: # %vector.body4002.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -9470,7 +9317,7 @@ init: # @init lu12i.w $a2, 219235 ori $a2, $a2, 1981 xvreplgr2vr.w $xr0, $a2 -.LBB5_743: # %vector.body3916 +.LBB5_743: # %vector.body4002 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -9488,14 +9335,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_745: # %vector.body3904 +.LBB5_745: # %vector.body3990 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9527,248 +9374,237 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_745 b .LBB5_573 -.LBB5_746: # %vector.body3879.preheader +.LBB5_746: # %vector.body3959.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_747: # %vector.body3879 +.LBB5_747: # %vector.body3959 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_747 -# %bb.748: # %vector.body3884.preheader +# %bb.748: # %vector.body3964.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_749: # %vector.body3884 +.LBB5_749: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_749 -# %bb.750: # %vector.body3889.preheader +# %bb.750: # %vector.body3969.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_751: # %vector.body3889 +.LBB5_751: # %vector.body3969 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_751 -# %bb.752: # %vector.body3894.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.752: # %vector.body3974.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_753: # %vector.body3894 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_753: # %vector.body3974 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_753 -# %bb.754: # %vector.body3899.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.754: # %vector.body3982.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_755: # %vector.body3899 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_755: # %vector.body3982 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_755 b .LBB5_573 -.LBB5_756: # %vector.body3859.preheader +.LBB5_756: # %vector.body3930.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_757: # %vector.body3859 +.LBB5_757: # %vector.body3930 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_757 -# %bb.758: # %vector.body3864.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.758: # %vector.body3935.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_759: # %vector.body3864 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_759: # %vector.body3935 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_759 -# %bb.760: # %vector.body3869.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.760: # %vector.body3943.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_761: # %vector.body3869 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_761: # %vector.body3943 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_761 -# %bb.762: # %vector.body3874.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.762: # %vector.body3951.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_763: # %vector.body3874 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_763: # %vector.body3951 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_763 b .LBB5_573 -.LBB5_764: # %vector.body3844.preheader +.LBB5_764: # %vector.body3909.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_765: # %vector.body3844 +.LBB5_765: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_765 -# %bb.766: # %vector.body3849.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.766: # %vector.body3914.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_767: # %vector.body3849 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_767: # %vector.body3914 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_767 -# %bb.768: # %vector.body3854.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.768: # %vector.body3922.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_769: # %vector.body3854 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_769: # %vector.body3922 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_769 b .LBB5_573 .LBB5_770: # %.preheader.i2684.preheader @@ -9785,103 +9621,99 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_771: # %vector.body3829 +.LBB5_771: # %vector.body3888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_771 -# %bb.772: # %vector.body3834.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.772: # %vector.body3893.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_773: # %vector.body3834 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_773: # %vector.body3893 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_773 -# %bb.774: # %vector.body3839.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.774: # %vector.body3901.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_775: # %vector.body3839 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_775: # %vector.body3901 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_775 b .LBB5_573 -.LBB5_776: # %vector.body3819.preheader +.LBB5_776: # %vector.body3875.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 32 - lu12i.w $a2, 7 - ori $a3, $a2, 3328 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_777: # %vector.body3819 +.LBB5_777: # %vector.body3875 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 - addi.d $a1, $a1, 64 + addi.d $a2, $a2, 64 bnez $a3, .LBB5_777 -# %bb.778: # %vector.body3824.preheader - lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a0, $a0, $a1 - ori $a1, $zero, 2 - ori $a2, $a2, 3328 -.LBB5_779: # %vector.body3824 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_779 +# %bb.778: # %vector.body3880.preheader + lu12i.w $a2, 31 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 1048 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_779: # %vector.body3880 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_779 b .LBB5_573 .LBB5_780: # %.preheader.i2711.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9897,23 +9729,23 @@ init: # @init lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_781: # %vector.body3807 +.LBB5_781: # %vector.body3863 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_781 -# %bb.782: # %vector.body3812.preheader +# %bb.782: # %vector.body3868.preheader lu12i.w $a0, 62 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) pcalau12i $a2, %pc_hi20(.LCPI5_1) - xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_2) + xvld $xr1, $a2, %pc_lo12(.LCPI5_2) ori $a0, $a0, 2096 add.d $a0, $fp, $a0 ori $a1, $a1, 3328 -.LBB5_783: # %vector.body3812 +.LBB5_783: # %vector.body3868 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9959,142 +9791,135 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_785: # %vector.body3792 +.LBB5_785: # %vector.body3842 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_785 -# %bb.786: # %vector.body3797.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.786: # %vector.body3847.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_787: # %vector.body3797 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_787: # %vector.body3847 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_787 -# %bb.788: # %vector.body3802.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.788: # %vector.body3855.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_789: # %vector.body3802 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_789: # %vector.body3855 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_789 b .LBB5_573 -.LBB5_790: # %vector.body3782.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_791: # %vector.body3782 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_790: # %vector.body3826.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_791: # %vector.body3826 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_791 -# %bb.792: # %vector.body3787.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_793: # %vector.body3787 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.792: # %vector.body3834.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_793: # %vector.body3834 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_793 b .LBB5_573 -.LBB5_794: # %vector.body3770.preheader +.LBB5_794: # %vector.body3811.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_795: # %vector.body3770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_795 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_795: # %vector.body3811 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_795 # %bb.796: # %.preheader34.i2757.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -10160,127 +9985,121 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_799: # %vector.body3755 +.LBB5_799: # %vector.body3790 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_799 -# %bb.800: # %vector.body3760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.800: # %vector.body3795.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_801: # %vector.body3760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_801: # %vector.body3795 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_801 -# %bb.802: # %vector.body3765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.802: # %vector.body3803.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_803: # %vector.body3765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_803: # %vector.body3803 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_803 b .LBB5_573 -.LBB5_804: # %vector.body3740.preheader +.LBB5_804: # %vector.body3769.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_805: # %vector.body3740 +.LBB5_805: # %vector.body3769 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_805 -# %bb.806: # %vector.body3745.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.806: # %vector.body3774.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_807: # %vector.body3745 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_807: # %vector.body3774 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_807 -# %bb.808: # %vector.body3750.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.808: # %vector.body3782.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_809: # %vector.body3750 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_809: # %vector.body3782 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_809 b .LBB5_573 .LBB5_810: # %.preheader.i2797.preheader @@ -10293,14 +10112,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_811: # %vector.body3733 +.LBB5_811: # %vector.body3762 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10342,14 +10161,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_813: # %vector.body3726 +.LBB5_813: # %vector.body3755 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10391,14 +10210,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_815: # %vector.body3719 +.LBB5_815: # %vector.body3748 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10440,14 +10259,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_817: # %vector.body3712 +.LBB5_817: # %vector.body3741 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10489,14 +10308,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_819: # %vector.body3705 +.LBB5_819: # %vector.body3734 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10528,26 +10347,26 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_819 b .LBB5_573 -.LBB5_820: # %vector.body3695.preheader +.LBB5_820: # %vector.body3724.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_821: # %vector.body3695 +.LBB5_821: # %vector.body3724 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_821 -# %bb.822: # %vector.body3700.preheader +# %bb.822: # %vector.body3729.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_823: # %vector.body3700 +.LBB5_823: # %vector.body3729 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10555,95 +10374,92 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_823 b .LBB5_573 -.LBB5_824: # %vector.body3680.preheader +.LBB5_824: # %vector.body3703.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_825: # %vector.body3680 +.LBB5_825: # %vector.body3703 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_825 -# %bb.826: # %vector.body3685.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.826: # %vector.body3708.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_827: # %vector.body3685 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_827: # %vector.body3708 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_827 -# %bb.828: # %vector.body3690.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.828: # %vector.body3716.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_829: # %vector.body3690 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_829: # %vector.body3716 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_829 b .LBB5_573 -.LBB5_830: # %vector.body3668.preheader +.LBB5_830: # %vector.body3691.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_831: # %vector.body3668 +.LBB5_831: # %vector.body3691 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_831 -# %bb.832: # %vector.body3673.preheader +# %bb.832: # %vector.body3696.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_833: # %vector.body3673 +.LBB5_833: # %vector.body3696 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10675,16 +10491,16 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_833 b .LBB5_573 -.LBB5_834: # %vector.body3653.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_834: # %vector.body3674.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_835: # %vector.body3653 +.LBB5_835: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10715,7 +10531,7 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_835 -# %bb.836: # %vector.body3658.preheader +# %bb.836: # %vector.body3681.preheader pcalau12i $a1, %pc_hi20(global_data) addi.d $a1, $a1, %pc_lo12(global_data) lu12i.w $a2, 31 @@ -10723,20 +10539,20 @@ init: # @init add.d $a2, $a1, $a2 ori $a3, $a0, 3328 xvldi $xr0, -1424 -.LBB5_837: # %vector.body3658 +.LBB5_837: # %vector.body3681 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_837 -# %bb.838: # %vector.body3663.preheader +# %bb.838: # %vector.body3686.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 xvldi $xr0, -1296 -.LBB5_839: # %vector.body3663 +.LBB5_839: # %vector.body3686 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -10744,40 +10560,40 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_839 b .LBB5_573 -.LBB5_840: # %vector.body3638.preheader +.LBB5_840: # %vector.body3659.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_841: # %vector.body3638 +.LBB5_841: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_841 -# %bb.842: # %vector.body3643.preheader +# %bb.842: # %vector.body3664.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -3264 -.LBB5_843: # %vector.body3643 +.LBB5_843: # %vector.body3664 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_843 -# %bb.844: # %vector.body3648.preheader +# %bb.844: # %vector.body3669.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3265 -.LBB5_845: # %vector.body3648 +.LBB5_845: # %vector.body3669 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10785,82 +10601,78 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_845 b .LBB5_573 -.LBB5_846: # %vector.body3633.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_847: # %vector.body3633 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_847 - b .LBB5_573 -.LBB5_848: # %vector.body3623.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 +.LBB5_846: # %vector.body3651.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_849: # %vector.body3623 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_849 -# %bb.850: # %vector.body3628.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_851: # %vector.body3628 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_847: # %vector.body3651 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_847 + b .LBB5_573 +.LBB5_848: # %vector.body3635.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_849: # %vector.body3635 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_849 +# %bb.850: # %vector.body3643.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_851: # %vector.body3643 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_851 b .LBB5_573 .Lfunc_end5: diff --git a/results/MultiSource/Benchmarks/TSVC/Symbolics-flt/CMakeFiles/Symbolics-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Symbolics-flt/CMakeFiles/Symbolics-flt.dir/tsc.s index 77b278b..53bdec4 100644 --- a/results/MultiSource/Benchmarks/TSVC/Symbolics-flt/CMakeFiles/Symbolics-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Symbolics-flt/CMakeFiles/Symbolics-flt.dir/tsc.s @@ -11,6 +11,11 @@ .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI0_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1d .p2align 5 @@ -63,30 +68,29 @@ set1d: # @set1d bnez $a1, .LBB0_3 b .LBB0_8 .LBB0_4: # %vector.body40.preheader - addi.d $a0, $a0, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a1, %pc_hi20(.LCPI0_2) + vld $vr0, $a1, %pc_lo12(.LCPI0_2) + addi.d $a0, $a0, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB0_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB0_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB0_5 b .LBB0_8 .LBB0_6: # %.preheader.preheader move $a2, $zero @@ -118,6 +122,11 @@ set1d: # @set1d .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI1_2: + .dword 0 # 0x0 + .dword 1 # 0x1 .text .globl set1ds .p2align 5 @@ -170,30 +179,29 @@ set1ds: # @set1ds bnez $a0, .LBB1_3 b .LBB1_8 .LBB1_4: # %vector.body40.preheader - addi.d $a0, $a1, 4 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) + addi.d $a0, $a1, 8 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 .p2align 4, , 16 .LBB1_5: # %vector.body40 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB1_5 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB1_5 b .LBB1_8 .LBB1_6: # %.preheader.preheader move $a0, $zero @@ -685,14 +693,19 @@ check: # @check .word .LBB4_14-.LJTI4_0 .word .LBB4_17-.LJTI4_0 # -- End function - .section .rodata.cst32,"aM",@progbits,32 - .p2align 5, 0x0 # -- Begin function init + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 # -- Begin function init .LCPI5_0: + .dword 0 # 0x0 + .dword 1 # 0x1 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 +.LCPI5_1: .dword 4 # 0x4 .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 -.LCPI5_1: +.LCPI5_2: .dword 0 # 0x0 .dword 1 # 0x1 .dword 2 # 0x2 @@ -703,11 +716,12 @@ check: # @check .type init,@function init: # @init # %bb.0: - addi.d $sp, $sp, -80 - st.d $ra, $sp, 72 # 8-byte Folded Spill - st.d $fp, $sp, 64 # 8-byte Folded Spill - st.d $s0, $sp, 56 # 8-byte Folded Spill - st.d $s1, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -96 + st.d $ra, $sp, 88 # 8-byte Folded Spill + st.d $fp, $sp, 80 # 8-byte Folded Spill + st.d $s0, $sp, 72 # 8-byte Folded Spill + st.d $s1, $sp, 64 # 8-byte Folded Spill + st.d $s2, $sp, 56 # 8-byte Folded Spill move $fp, $a0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a1, $a0, %pc_lo12(.L.str.1) @@ -1661,131 +1675,122 @@ init: # @init jirl $ra, $ra, 0 bnez $a0, .LBB5_573 # %bb.136: # %vector.body.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 + addi.d $a3, $a0, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 .LBB5_137: # %vector.body # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_137 # %bb.138: # %vector.body3598.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 + lu12i.w $a3, 31 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 .LBB5_139: # %vector.body3598 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_139 -# %bb.140: # %vector.body3603.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_141: # %vector.body3603 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.140: # %vector.body3606.preheader + lu12i.w $a3, 62 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_141: # %vector.body3606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_141 -# %bb.142: # %vector.body3608.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_143: # %vector.body3608 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.142: # %vector.body3614.preheader + lu12i.w $a3, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 + ori $a4, $a2, 3328 +.LBB5_143: # %vector.body3614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_143 -# %bb.144: # %vector.body3613.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_145: # %vector.body3613 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 - bnez $a1, .LBB5_145 +# %bb.144: # %vector.body3622.preheader + lu12i.w $a3, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a3, 136 + add.d $a1, $a0, $a1 + ori $a2, $a2, 3328 +.LBB5_145: # %vector.body3622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_145 # %bb.146: # %.preheader34.i2938.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -1837,7 +1842,7 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_147 b .LBB5_573 -.LBB5_148: # %vector.body5445.preheader +.LBB5_148: # %vector.body5846.preheader pcalau12i $a0, %pc_hi20(X+4) addi.d $a0, $a0, %pc_lo12(X+4) pcalau12i $a1, %pc_hi20(V+4) @@ -1852,7 +1857,7 @@ init: # @init lu12i.w $a6, 7 ori $a6, $a6, 3328 .p2align 4, , 16 -.LBB5_149: # %vector.body5445 +.LBB5_149: # %vector.body5846 # =>This Inner Loop Header: Depth=1 bstrpick.d $a7, $a5, 31, 0 addi.d $t0, $a5, -5 @@ -1896,7 +1901,7 @@ init: # @init addi.d $a4, $a4, 8 bnez $a6, .LBB5_149 b .LBB5_573 -.LBB5_150: # %vector.body5412.preheader +.LBB5_150: # %vector.body5813.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -1904,24 +1909,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_151: # %vector.body5412 +.LBB5_151: # %vector.body5813 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_151 -# %bb.152: # %vector.body5417.preheader +# %bb.152: # %vector.body5818.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_153: # %vector.body5417 +.LBB5_153: # %vector.body5818 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1952,15 +1957,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_153 -# %bb.154: # %vector.body5424.preheader +# %bb.154: # %vector.body5825.preheader lu12i.w $a4, 62 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 2096 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_155: # %vector.body5424 +.LBB5_155: # %vector.body5825 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -1991,15 +1996,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_155 -# %bb.156: # %vector.body5431.preheader +# %bb.156: # %vector.body5832.preheader lu12i.w $a4, 93 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 3152 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 .p2align 4, , 16 -.LBB5_157: # %vector.body5431 +.LBB5_157: # %vector.body5832 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2030,15 +2035,15 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_157 -# %bb.158: # %vector.body5438.preheader +# %bb.158: # %vector.body5839.preheader lu12i.w $a4, 125 - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_159: # %vector.body5438 +.LBB5_159: # %vector.body5839 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2070,7 +2075,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_159 b .LBB5_573 -.LBB5_160: # %vector.body5400.preheader +.LBB5_160: # %vector.body5801.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2078,24 +2083,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_161: # %vector.body5400 +.LBB5_161: # %vector.body5801 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_161 -# %bb.162: # %vector.body5405.preheader +# %bb.162: # %vector.body5806.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_163: # %vector.body5405 +.LBB5_163: # %vector.body5806 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2127,7 +2132,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_163 b .LBB5_573 -.LBB5_164: # %vector.body5388.preheader +.LBB5_164: # %vector.body5789.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -2135,24 +2140,24 @@ init: # @init ori $a3, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_165: # %vector.body5388 +.LBB5_165: # %vector.body5789 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_165 -# %bb.166: # %vector.body5393.preheader +# %bb.166: # %vector.body5794.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 .p2align 4, , 16 -.LBB5_167: # %vector.body5393 +.LBB5_167: # %vector.body5794 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2288,7 +2293,7 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_171 b .LBB5_573 -.LBB5_172: # %vector.body5354.preheader +.LBB5_172: # %vector.body5755.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2296,7 +2301,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_173: # %vector.body5354 +.LBB5_173: # %vector.body5755 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2436,14 +2441,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_179 b .LBB5_573 -.LBB5_180: # %vector.body5349.preheader +.LBB5_180: # %vector.body5750.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_181: # %vector.body5349 +.LBB5_181: # %vector.body5750 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -2451,7 +2456,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_181 b .LBB5_573 -.LBB5_182: # %vector.body5339.preheader +.LBB5_182: # %vector.body5740.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 @@ -2459,7 +2464,7 @@ init: # @init ori $a2, $a2, 3328 xvldi $xr0, -1424 .p2align 4, , 16 -.LBB5_183: # %vector.body5339 +.LBB5_183: # %vector.body5740 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -2610,30 +2615,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_189 b .LBB5_573 -.LBB5_190: # %vector.body5315.preheader +.LBB5_190: # %vector.body5716.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_191: # %vector.body5315 +.LBB5_191: # %vector.body5716 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_191 -# %bb.192: # %vector.body5320.preheader +# %bb.192: # %vector.body5721.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_193: # %vector.body5320 +.LBB5_193: # %vector.body5721 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2665,30 +2670,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_193 b .LBB5_573 -.LBB5_194: # %vector.body5303.preheader +.LBB5_194: # %vector.body5704.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_195: # %vector.body5303 +.LBB5_195: # %vector.body5704 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_195 -# %bb.196: # %vector.body5308.preheader +# %bb.196: # %vector.body5709.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_197: # %vector.body5308 +.LBB5_197: # %vector.body5709 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -2734,74 +2739,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_199: # %vector.body5283 +.LBB5_199: # %vector.body5678 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_199 -# %bb.200: # %vector.body5288.preheader +# %bb.200: # %vector.body5683.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_201: # %vector.body5288 +.LBB5_201: # %vector.body5683 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_201 -# %bb.202: # %vector.body5293.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.202: # %vector.body5688.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_203: # %vector.body5293 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_203: # %vector.body5688 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_203 -# %bb.204: # %vector.body5298.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.204: # %vector.body5696.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_205: # %vector.body5298 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_205: # %vector.body5696 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_205 b .LBB5_573 .LBB5_206: # %.preheader.i1195.preheader @@ -2818,74 +2820,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_207: # %vector.body5263 +.LBB5_207: # %vector.body5652 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_207 -# %bb.208: # %vector.body5268.preheader +# %bb.208: # %vector.body5657.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_209: # %vector.body5268 +.LBB5_209: # %vector.body5657 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_209 -# %bb.210: # %vector.body5273.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.210: # %vector.body5662.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_211: # %vector.body5273 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_211: # %vector.body5662 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_211 -# %bb.212: # %vector.body5278.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.212: # %vector.body5670.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_213: # %vector.body5278 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_213: # %vector.body5670 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_213 b .LBB5_573 .LBB5_214: # %.preheader.i1217.preheader @@ -3072,31 +3071,30 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_221 -# %bb.222: # %vector.body5236.preheader - pcalau12i $a1, %pc_hi20(array+4) - addi.d $a1, $a1, %pc_lo12(array+4) - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_223: # %vector.body5236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_223 +# %bb.222: # %vector.body5622.preheader + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(array+8) + addi.d $a1, $a1, %pc_lo12(array+8) + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_223: # %vector.body5622 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_223 # %bb.224: # %.preheader34.i1253.preheader lu12i.w $a1, 284 ori $a1, $a1, 1888 @@ -3162,87 +3160,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_227: # %vector.body5211 +.LBB5_227: # %vector.body5588 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_227 -# %bb.228: # %vector.body5216.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.228: # %vector.body5593.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_229: # %vector.body5216 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_229: # %vector.body5593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_229 -# %bb.230: # %vector.body5221.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.230: # %vector.body5601.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_231: # %vector.body5221 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_231: # %vector.body5601 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_231 -# %bb.232: # %vector.body5226.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.232: # %vector.body5609.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_233: # %vector.body5226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_233: # %vector.body5609 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_233 b .LBB5_573 .LBB5_234: # %.preheader.i1284.preheader @@ -3259,32 +3252,32 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -3264 -.LBB5_235: # %vector.body5196 +.LBB5_235: # %vector.body5573 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_235 -# %bb.236: # %vector.body5201.preheader +# %bb.236: # %vector.body5578.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_237: # %vector.body5201 +.LBB5_237: # %vector.body5578 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_237 -# %bb.238: # %vector.body5206.preheader +# %bb.238: # %vector.body5583.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_239: # %vector.body5206 +.LBB5_239: # %vector.body5583 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -3292,30 +3285,30 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_239 b .LBB5_573 -.LBB5_240: # %vector.body5184.preheader +.LBB5_240: # %vector.body5561.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_241: # %vector.body5184 +.LBB5_241: # %vector.body5561 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_241 -# %bb.242: # %vector.body5189.preheader +# %bb.242: # %vector.body5566.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_243: # %vector.body5189 +.LBB5_243: # %vector.body5566 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3392,65 +3385,62 @@ init: # @init addi.d $a2, $a2, -1 addi.d $a1, $a1, 1024 bnez $a2, .LBB5_245 -# %bb.246: # %vector.body5174.preheader +# %bb.246: # %vector.body5545.preheader lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_247: # %vector.body5174 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_247: # %vector.body5545 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_247 -# %bb.248: # %vector.body5179.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.248: # %vector.body5553.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_249: # %vector.body5179 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_249: # %vector.body5553 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_249 b .LBB5_573 -.LBB5_250: # %vector.body5157.preheader +.LBB5_250: # %vector.body5528.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_251: # %vector.body5157 +.LBB5_251: # %vector.body5528 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3506,30 +3496,30 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_253 b .LBB5_573 -.LBB5_254: # %vector.body5145.preheader +.LBB5_254: # %vector.body5516.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_255: # %vector.body5145 +.LBB5_255: # %vector.body5516 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_255 -# %bb.256: # %vector.body5150.preheader +# %bb.256: # %vector.body5521.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_257: # %vector.body5150 +.LBB5_257: # %vector.body5521 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3561,14 +3551,14 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_257 b .LBB5_573 -.LBB5_258: # %vector.body5125.preheader +.LBB5_258: # %vector.body5487.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 lu12i.w $s0, 7 ori $a1, $s0, 3328 xvldi $xr0, -1424 -.LBB5_259: # %vector.body5125 +.LBB5_259: # %vector.body5487 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -3583,274 +3573,263 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 + lu12i.w $a1, 62 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 2104 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_261: # %vector.body5130 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_261: # %vector.body5492 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_261 -# %bb.262: # %vector.body5135.preheader - lu12i.w $a0, 93 - ori $a0, $a0, 3156 - add.d $a0, $fp, $a0 - ori $a1, $zero, 2 +# %bb.262: # %vector.body5500.preheader + lu12i.w $a1, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a1, $a1, 3160 + add.d $a1, $fp, $a1 ori $a2, $s0, 3328 -.LBB5_263: # %vector.body5135 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 +.LBB5_263: # %vector.body5500 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 bnez $a2, .LBB5_263 -# %bb.264: # %vector.body5140.preheader - lu12i.w $a0, 125 - ori $a0, $a0, 132 +# %bb.264: # %vector.body5508.preheader + lu12i.w $a1, 125 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a1, 136 add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - ori $a2, $s0, 3328 -.LBB5_265: # %vector.body5140 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_265 + ori $a1, $s0, 3328 +.LBB5_265: # %vector.body5508 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_265 b .LBB5_573 -.LBB5_266: # %vector.body5094.preheader +.LBB5_266: # %vector.body5450.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_267: # %vector.body5094 +.LBB5_267: # %vector.body5450 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_267 -# %bb.268: # %vector.body5099.preheader +# %bb.268: # %vector.body5455.preheader lu12i.w $a2, 31 ori $a3, $a2, 1048 add.d $a4, $a0, $a3 lu12i.w $a3, 3 ori $a5, $a3, 3712 lu12i.w $a6, 260096 -.LBB5_269: # %vector.body5099 +.LBB5_269: # %vector.body5455 # =>This Inner Loop Header: Depth=1 st.w $a6, $a4, -8 st.w $a6, $a4, 0 addi.d $a5, $a5, -2 addi.d $a4, $a4, 16 bnez $a5, .LBB5_269 -# %bb.270: # %vector.body5104.preheader +# %bb.270: # %vector.body5460.preheader ori $a2, $a2, 1052 add.d $a2, $a0, $a2 ori $a3, $a3, 3712 lu12i.w $a4, -264192 lu32i.d $a4, 0 -.LBB5_271: # %vector.body5104 +.LBB5_271: # %vector.body5460 # =>This Inner Loop Header: Depth=1 st.w $a4, $a2, -8 st.w $a4, $a2, 0 addi.d $a3, $a3, -2 addi.d $a2, $a2, 16 bnez $a3, .LBB5_271 -# %bb.272: # %vector.body5110.preheader +# %bb.272: # %vector.body5466.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_273: # %vector.body5110 +.LBB5_273: # %vector.body5466 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_273 -# %bb.274: # %vector.body5115.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.274: # %vector.body5471.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_275: # %vector.body5115 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_275: # %vector.body5471 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_275 -# %bb.276: # %vector.body5120.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.276: # %vector.body5479.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_277: # %vector.body5120 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_277: # %vector.body5479 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_277 b .LBB5_573 -.LBB5_278: # %vector.body5079.preheader +.LBB5_278: # %vector.body5429.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_279: # %vector.body5079 +.LBB5_279: # %vector.body5429 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_279 -# %bb.280: # %vector.body5084.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.280: # %vector.body5434.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_281: # %vector.body5084 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_281: # %vector.body5434 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_281 -# %bb.282: # %vector.body5089.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.282: # %vector.body5442.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_283: # %vector.body5089 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_283: # %vector.body5442 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_283 b .LBB5_573 -.LBB5_284: # %vector.body5067.preheader +.LBB5_284: # %vector.body5417.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_285: # %vector.body5067 +.LBB5_285: # %vector.body5417 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_285 -# %bb.286: # %vector.body5072.preheader +# %bb.286: # %vector.body5422.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_287: # %vector.body5072 +.LBB5_287: # %vector.body5422 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3882,30 +3861,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_287 b .LBB5_573 -.LBB5_288: # %vector.body5055.preheader +.LBB5_288: # %vector.body5405.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_289: # %vector.body5055 +.LBB5_289: # %vector.body5405 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_289 -# %bb.290: # %vector.body5060.preheader +# %bb.290: # %vector.body5410.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_291: # %vector.body5060 +.LBB5_291: # %vector.body5410 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3937,30 +3916,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_291 b .LBB5_573 -.LBB5_292: # %vector.body5043.preheader +.LBB5_292: # %vector.body5393.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_293: # %vector.body5043 +.LBB5_293: # %vector.body5393 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_293 -# %bb.294: # %vector.body5048.preheader +# %bb.294: # %vector.body5398.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_295: # %vector.body5048 +.LBB5_295: # %vector.body5398 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -3992,30 +3971,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_295 b .LBB5_573 -.LBB5_296: # %vector.body5031.preheader +.LBB5_296: # %vector.body5381.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_297: # %vector.body5031 +.LBB5_297: # %vector.body5381 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_297 -# %bb.298: # %vector.body5036.preheader +# %bb.298: # %vector.body5386.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_299: # %vector.body5036 +.LBB5_299: # %vector.body5386 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4047,30 +4026,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_299 b .LBB5_573 -.LBB5_300: # %vector.body5019.preheader +.LBB5_300: # %vector.body5369.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_301: # %vector.body5019 +.LBB5_301: # %vector.body5369 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_301 -# %bb.302: # %vector.body5024.preheader +# %bb.302: # %vector.body5374.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_303: # %vector.body5024 +.LBB5_303: # %vector.body5374 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -4102,69 +4081,66 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_303 b .LBB5_573 -.LBB5_304: # %vector.body5004.preheader +.LBB5_304: # %vector.body5348.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_305: # %vector.body5004 +.LBB5_305: # %vector.body5348 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_305 -# %bb.306: # %vector.body5009.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.306: # %vector.body5353.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_307: # %vector.body5009 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_307: # %vector.body5353 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_307 -# %bb.308: # %vector.body5014.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.308: # %vector.body5361.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_309: # %vector.body5014 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_309: # %vector.body5361 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_309 b .LBB5_573 .LBB5_310: # %.preheader.i1469.preheader @@ -4181,254 +4157,241 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_311: # %vector.body4984 +.LBB5_311: # %vector.body5319 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_311 -# %bb.312: # %vector.body4989.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.312: # %vector.body5324.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_313: # %vector.body4989 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_313: # %vector.body5324 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_313 -# %bb.314: # %vector.body4994.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.314: # %vector.body5332.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_315: # %vector.body4994 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_315: # %vector.body5332 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_315 -# %bb.316: # %vector.body4999.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.316: # %vector.body5340.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_317: # %vector.body4999 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_317: # %vector.body5340 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_317 b .LBB5_573 -.LBB5_318: # %vector.body4964.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_319: # %vector.body4964 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_318: # %vector.body5293.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_319: # %vector.body5293 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_319 -# %bb.320: # %vector.body4969.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1072 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 +# %bb.320: # %vector.body5301.preheader + lu12i.w $a3, 31 + ori $a3, $a3, 1072 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 xvldi $xr0, -1424 -.LBB5_321: # %vector.body4969 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_321 -# %bb.322: # %vector.body4974.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2128 - add.d $a2, $a0, $a2 - ori $a3, $a1, 3328 -.LBB5_323: # %vector.body4974 - # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a2, -32 - xvst $xr0, $a2, 0 - addi.d $a3, $a3, -16 - addi.d $a2, $a2, 64 - bnez $a3, .LBB5_323 -# %bb.324: # %vector.body4979.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_325: # %vector.body4979 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_321: # %vector.body5301 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_321 +# %bb.322: # %vector.body5306.preheader + lu12i.w $a3, 62 + ori $a3, $a3, 2128 + add.d $a3, $a1, $a3 + ori $a4, $a2, 3328 +.LBB5_323: # %vector.body5306 + # =>This Inner Loop Header: Depth=1 + xvst $xr0, $a3, -32 + xvst $xr0, $a3, 0 + addi.d $a4, $a4, -16 + addi.d $a3, $a3, 64 + bnez $a4, .LBB5_323 +# %bb.324: # %vector.body5311.preheader + lu12i.w $a3, 93 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 3160 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_325: # %vector.body5311 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_325 b .LBB5_573 -.LBB5_326: # %vector.body4944.preheader +.LBB5_326: # %vector.body5264.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_327: # %vector.body4944 +.LBB5_327: # %vector.body5264 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_327 -# %bb.328: # %vector.body4949.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.328: # %vector.body5269.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_329: # %vector.body4949 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_329: # %vector.body5269 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_329 -# %bb.330: # %vector.body4954.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.330: # %vector.body5277.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_331: # %vector.body4954 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_331: # %vector.body5277 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_331 -# %bb.332: # %vector.body4959.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.332: # %vector.body5285.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_333: # %vector.body4959 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_333: # %vector.body5285 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_333 b .LBB5_573 .LBB5_334: # %.preheader.i1529.preheader @@ -4445,19 +4408,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_335: # %vector.body4934 +.LBB5_335: # %vector.body5254 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_335 -# %bb.336: # %vector.body4939.preheader +# %bb.336: # %vector.body5259.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_337: # %vector.body4939 +.LBB5_337: # %vector.body5259 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -4915,69 +4878,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_358 b .LBB5_573 -.LBB5_359: # %vector.body4850.preheader +.LBB5_359: # %vector.body5164.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_360: # %vector.body4850 +.LBB5_360: # %vector.body5164 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_360 -# %bb.361: # %vector.body4855.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.361: # %vector.body5169.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_362: # %vector.body4855 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a5 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_362: # %vector.body5169 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a3, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_362 -# %bb.363: # %vector.body4860.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.363: # %vector.body5177.preheader + lu12i.w $a3, 62 + vld $vr1, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 ori $a1, $a1, 3328 -.LBB5_364: # %vector.body4860 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a3, 31, 0 - addi.d $a5, $a3, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - movgr2fr.d $fa2, $a4 - ffint.d.l $fa2, $fa2 - frecip.d $fa1, $fa1 - frecip.d $fa2, $fa2 - fcvt.s.d $fa1, $fa1 - fcvt.s.d $fa2, $fa2 - fst.s $fa1, $a2, -4 - fst.s $fa2, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 +.LBB5_364: # %vector.body5177 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr2, $vr1, 8 + vaddi.wu $vr3, $vr2, 1 + vaddi.wu $vr2, $vr2, 3 + vext2xv.du.wu $xr3, $xr3 + xvffint.d.lu $xr3, $xr3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vfrecip.d $vr3, $vr3 + vfrecip.d $vr2, $vr2 + vfcvt.s.d $vr2, $vr2, $vr3 + vst $vr2, $a2, -8 + vaddi.du $vr1, $vr1, 4 + addi.d $a1, $a1, -4 + addi.d $a2, $a2, 16 bnez $a1, .LBB5_364 # %bb.365: # %.preheader.i1631.preheader lu12i.w $a1, 156 @@ -5071,50 +5031,50 @@ init: # @init addi.d $a0, $a0, 1024 bne $a1, $a2, .LBB5_368 b .LBB5_573 -.LBB5_369: # %vector.body4830.preheader +.LBB5_369: # %vector.body5144.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_370: # %vector.body4830 +.LBB5_370: # %vector.body5144 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_370 -# %bb.371: # %vector.body4835.preheader +# %bb.371: # %vector.body5149.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_372: # %vector.body4835 +.LBB5_372: # %vector.body5149 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_372 -# %bb.373: # %vector.body4840.preheader +# %bb.373: # %vector.body5154.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_374: # %vector.body4840 +.LBB5_374: # %vector.body5154 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_374 -# %bb.375: # %vector.body4845.preheader +# %bb.375: # %vector.body5159.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_376: # %vector.body4845 +.LBB5_376: # %vector.body5159 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5122,7 +5082,7 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_376 b .LBB5_573 -.LBB5_377: # %vector.body4810.preheader +.LBB5_377: # %vector.body5124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 @@ -5131,43 +5091,43 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_378: # %vector.body4810 +.LBB5_378: # %vector.body5124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_378 -# %bb.379: # %vector.body4815.preheader +# %bb.379: # %vector.body5129.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_380: # %vector.body4815 +.LBB5_380: # %vector.body5129 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_380 -# %bb.381: # %vector.body4820.preheader +# %bb.381: # %vector.body5134.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_382: # %vector.body4820 +.LBB5_382: # %vector.body5134 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_382 -# %bb.383: # %vector.body4825.preheader +# %bb.383: # %vector.body5139.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_384: # %vector.body4825 +.LBB5_384: # %vector.body5139 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5189,87 +5149,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_386: # %vector.body4790 +.LBB5_386: # %vector.body5095 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_386 -# %bb.387: # %vector.body4795.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.387: # %vector.body5100.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_388: # %vector.body4795 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_388: # %vector.body5100 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_388 -# %bb.389: # %vector.body4800.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.389: # %vector.body5108.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_390: # %vector.body4800 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_390: # %vector.body5108 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_390 -# %bb.391: # %vector.body4805.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.391: # %vector.body5116.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_392: # %vector.body4805 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_392: # %vector.body5116 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_392 b .LBB5_573 .LBB5_393: # %.preheader.i1701.preheader @@ -5286,14 +5241,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_394: # %vector.body4775 +.LBB5_394: # %vector.body5080 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_394 -# %bb.395: # %vector.body4780.preheader +# %bb.395: # %vector.body5085.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -5301,19 +5256,19 @@ init: # @init lu12i.w $a3, 219235 ori $a3, $a3, 1981 xvreplgr2vr.w $xr0, $a3 -.LBB5_396: # %vector.body4780 +.LBB5_396: # %vector.body5085 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_396 -# %bb.397: # %vector.body4785.preheader +# %bb.397: # %vector.body5090.preheader lu12i.w $a1, 93 ori $a1, $a1, 3184 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_398: # %vector.body4785 +.LBB5_398: # %vector.body5090 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5335,87 +5290,82 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_400: # %vector.body4755 +.LBB5_400: # %vector.body5051 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_400 -# %bb.401: # %vector.body4760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.401: # %vector.body5056.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_402: # %vector.body4760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_402: # %vector.body5056 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_402 -# %bb.403: # %vector.body4765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.403: # %vector.body5064.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_404: # %vector.body4765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_404: # %vector.body5064 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_404 -# %bb.405: # %vector.body4770.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.405: # %vector.body5072.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_406: # %vector.body4770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_406: # %vector.body5072 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_406 b .LBB5_573 .LBB5_407: # %.preheader.i1740.preheader @@ -5432,19 +5382,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_408: # %vector.body4745 +.LBB5_408: # %vector.body5041 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_408 -# %bb.409: # %vector.body4750.preheader +# %bb.409: # %vector.body5046.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_410: # %vector.body4750 +.LBB5_410: # %vector.body5046 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5452,21 +5402,21 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_410 b .LBB5_573 -.LBB5_411: # %vector.body4725.preheader +.LBB5_411: # %vector.body5018.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_412: # %vector.body4725 +.LBB5_412: # %vector.body5018 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_412 -# %bb.413: # %vector.body4730.preheader +# %bb.413: # %vector.body5023.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 @@ -5474,49 +5424,48 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr1, $a4 -.LBB5_414: # %vector.body4730 +.LBB5_414: # %vector.body5023 # =>This Inner Loop Header: Depth=1 xvst $xr1, $a2, -32 xvst $xr1, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_414 -# %bb.415: # %vector.body4735.preheader +# %bb.415: # %vector.body5028.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_416: # %vector.body4735 +.LBB5_416: # %vector.body5028 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_416 -# %bb.417: # %vector.body4740.preheader +# %bb.417: # %vector.body5033.preheader lu12i.w $a2, 93 - ori $a2, $a2, 3156 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_418: # %vector.body4740 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_418: # %vector.body5033 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_418 b .LBB5_573 .LBB5_419: # %.preheader.i1769.preheader @@ -5533,7 +5482,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_420: # %vector.body4720 +.LBB5_420: # %vector.body5013 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5555,7 +5504,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_422: # %vector.body4715 +.LBB5_422: # %vector.body5008 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -5563,14 +5512,14 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_422 b .LBB5_573 -.LBB5_423: # %vector.body4700.preheader +.LBB5_423: # %vector.body4993.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_424: # %vector.body4700 +.LBB5_424: # %vector.body4993 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5663,14 +5612,14 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_428 b .LBB5_573 -.LBB5_429: # %vector.body4685.preheader +.LBB5_429: # %vector.body4978.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_430: # %vector.body4685 +.LBB5_430: # %vector.body4978 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -5763,32 +5712,31 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_434 b .LBB5_573 -.LBB5_435: # %vector.body4663.preheader +.LBB5_435: # %vector.body4947.preheader + pcalau12i $s1, %pc_hi20(.LCPI5_0) + vld $vr0, $s1, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) - addi.d $a0, $s0, 4 - ori $a1, $zero, 2 - lu12i.w $s1, 7 - ori $a2, $s1, 3328 -.LBB5_436: # %vector.body4663 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_436 + addi.d $a0, $s0, 8 + lu12i.w $s2, 7 + ori $a1, $s2, 3328 +.LBB5_436: # %vector.body4947 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_436 # %bb.437: # %.preheader.i1826.preheader lu12i.w $a1, 31 ori $a0, $a1, 1040 @@ -5799,54 +5747,50 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 2104 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_438: # %vector.body4668 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_438 -# %bb.439: # %vector.body4673.preheader + ori $a1, $s2, 3328 +.LBB5_438: # %vector.body4955 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_438 +# %bb.439: # %vector.body4963.preheader lu12i.w $a0, 93 - ori $a0, $a0, 3156 + vld $vr0, $s1, %pc_lo12(.LCPI5_0) + ori $a0, $a0, 3160 add.d $a0, $s0, $a0 - ori $a1, $zero, 2 - ori $a2, $s1, 3328 -.LBB5_440: # %vector.body4673 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_440 + ori $a1, $s2, 3328 +.LBB5_440: # %vector.body4963 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_440 # %bb.441: # %.preheader.i1840.preheader lu12i.w $a0, 125 ori $a0, $a0, 128 @@ -5905,30 +5849,30 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_442 b .LBB5_573 -.LBB5_443: # %vector.body4639.preheader +.LBB5_443: # %vector.body4923.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_444: # %vector.body4639 +.LBB5_444: # %vector.body4923 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_444 -# %bb.445: # %vector.body4644.preheader +# %bb.445: # %vector.body4928.preheader lu12i.w $a4, 31 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a4, $a4, 1040 add.d $a4, $a0, $a4 ori $a5, $a1, 3328 -.LBB5_446: # %vector.body4644 +.LBB5_446: # %vector.body4928 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5959,14 +5903,14 @@ init: # @init addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 bnez $a5, .LBB5_446 -# %bb.447: # %vector.body4651.preheader +# %bb.447: # %vector.body4935.preheader lu12i.w $a4, 62 - xvld $xr1, $a2, %pc_lo12(.LCPI5_0) - xvld $xr2, $a3, %pc_lo12(.LCPI5_1) + xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr2, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a4, 2096 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_448: # %vector.body4651 +.LBB5_448: # %vector.body4935 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr3, $xr2, 1 xvaddi.du $xr4, $xr1, 1 @@ -5997,12 +5941,12 @@ init: # @init addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 bnez $a3, .LBB5_448 -# %bb.449: # %vector.body4658.preheader +# %bb.449: # %vector.body4942.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_450: # %vector.body4658 +.LBB5_450: # %vector.body4942 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6010,154 +5954,148 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_450 b .LBB5_573 -.LBB5_451: # %vector.body4624.preheader +.LBB5_451: # %vector.body4902.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_452: # %vector.body4624 +.LBB5_452: # %vector.body4902 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_452 -# %bb.453: # %vector.body4629.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.453: # %vector.body4907.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_454: # %vector.body4629 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_454: # %vector.body4907 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_454 -# %bb.455: # %vector.body4634.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.455: # %vector.body4915.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_456: # %vector.body4634 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_456: # %vector.body4915 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_456 b .LBB5_573 -.LBB5_457: # %vector.body4599.preheader +.LBB5_457: # %vector.body4871.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_458: # %vector.body4599 +.LBB5_458: # %vector.body4871 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_458 -# %bb.459: # %vector.body4604.preheader +# %bb.459: # %vector.body4876.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_460: # %vector.body4604 +.LBB5_460: # %vector.body4876 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_460 -# %bb.461: # %vector.body4609.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.461: # %vector.body4881.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_462: # %vector.body4609 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_462: # %vector.body4881 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_462 -# %bb.463: # %vector.body4614.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.463: # %vector.body4889.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_464: # %vector.body4614 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_464 -# %bb.465: # %vector.body4619.preheader + ori $a3, $a1, 3328 +.LBB5_464: # %vector.body4889 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 + bnez $a3, .LBB5_464 +# %bb.465: # %vector.body4897.preheader lu12i.w $a2, 125 ori $a2, $a2, 160 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3264 -.LBB5_466: # %vector.body4619 +.LBB5_466: # %vector.body4897 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -6165,45 +6103,45 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_466 b .LBB5_573 -.LBB5_467: # %vector.body4574.preheader +.LBB5_467: # %vector.body4843.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_468: # %vector.body4574 +.LBB5_468: # %vector.body4843 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_468 -# %bb.469: # %vector.body4579.preheader +# %bb.469: # %vector.body4848.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_470: # %vector.body4579 +.LBB5_470: # %vector.body4848 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_470 -# %bb.471: # %vector.body4584.preheader +# %bb.471: # %vector.body4853.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_472: # %vector.body4584 +.LBB5_472: # %vector.body4853 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_472 -# %bb.473: # %vector.body4589.preheader +# %bb.473: # %vector.body4858.preheader lu12i.w $a2, 93 ori $a2, $a2, 3184 add.d $a2, $a0, $a2 @@ -6211,37 +6149,36 @@ init: # @init lu12i.w $a4, 219235 ori $a4, $a4, 1981 xvreplgr2vr.w $xr0, $a4 -.LBB5_474: # %vector.body4589 +.LBB5_474: # %vector.body4858 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_474 -# %bb.475: # %vector.body4594.preheader +# %bb.475: # %vector.body4863.preheader lu12i.w $a2, 125 - ori $a2, $a2, 132 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_476: # %vector.body4594 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_476: # %vector.body4863 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_476 b .LBB5_573 .LBB5_477: # %.preheader.i1927.preheader @@ -6258,74 +6195,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_478: # %vector.body4554 +.LBB5_478: # %vector.body4817 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_478 -# %bb.479: # %vector.body4559.preheader +# %bb.479: # %vector.body4822.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_480: # %vector.body4559 +.LBB5_480: # %vector.body4822 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_480 -# %bb.481: # %vector.body4564.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.481: # %vector.body4827.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_482: # %vector.body4564 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_482: # %vector.body4827 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_482 -# %bb.483: # %vector.body4569.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.483: # %vector.body4835.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_484: # %vector.body4569 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_484: # %vector.body4835 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_484 b .LBB5_573 .LBB5_485: # %.preheader.i1949.preheader @@ -6461,669 +6395,638 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_490 b .LBB5_573 -.LBB5_491: # %vector.body4519.preheader +.LBB5_491: # %vector.body4773.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_492: # %vector.body4519 +.LBB5_492: # %vector.body4773 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_492 -# %bb.493: # %vector.body4524.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.493: # %vector.body4778.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_494: # %vector.body4524 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_494: # %vector.body4778 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_494 -# %bb.495: # %vector.body4529.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.495: # %vector.body4786.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_496: # %vector.body4529 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_496: # %vector.body4786 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_496 -# %bb.497: # %vector.body4534.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.497: # %vector.body4794.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_498: # %vector.body4534 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_498: # %vector.body4794 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_498 b .LBB5_573 -.LBB5_499: # %vector.body4489.preheader +.LBB5_499: # %vector.body4734.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_500: # %vector.body4489 +.LBB5_500: # %vector.body4734 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_500 -# %bb.501: # %vector.body4494.preheader +# %bb.501: # %vector.body4739.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_502: # %vector.body4494 +.LBB5_502: # %vector.body4739 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_502 -# %bb.503: # %vector.body4499.preheader +# %bb.503: # %vector.body4744.preheader lu12i.w $a2, 46 ori $a2, $a2, 3632 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_504: # %vector.body4499 +.LBB5_504: # %vector.body4744 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_504 -# %bb.505: # %vector.body4504.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.505: # %vector.body4749.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_506: # %vector.body4504 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_506: # %vector.body4749 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_506 -# %bb.507: # %vector.body4509.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.507: # %vector.body4757.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_508: # %vector.body4509 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_508: # %vector.body4757 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_508 -# %bb.509: # %vector.body4514.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.509: # %vector.body4765.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_510: # %vector.body4514 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_510: # %vector.body4765 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_510 b .LBB5_573 -.LBB5_511: # %vector.body4459.preheader +.LBB5_511: # %vector.body4695.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_512: # %vector.body4459 +.LBB5_512: # %vector.body4695 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_512 -# %bb.513: # %vector.body4464.preheader +# %bb.513: # %vector.body4700.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_514: # %vector.body4464 +.LBB5_514: # %vector.body4700 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_514 -# %bb.515: # %vector.body4469.preheader +# %bb.515: # %vector.body4705.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_516: # %vector.body4469 +.LBB5_516: # %vector.body4705 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_516 -# %bb.517: # %vector.body4474.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.517: # %vector.body4710.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_518: # %vector.body4474 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_518: # %vector.body4710 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_518 -# %bb.519: # %vector.body4479.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.519: # %vector.body4718.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_520: # %vector.body4479 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_520: # %vector.body4718 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_520 -# %bb.521: # %vector.body4484.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.521: # %vector.body4726.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_522: # %vector.body4484 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_522: # %vector.body4726 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_522 b .LBB5_573 -.LBB5_523: # %vector.body4429.preheader +.LBB5_523: # %vector.body4656.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1296 -.LBB5_524: # %vector.body4429 +.LBB5_524: # %vector.body4656 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_524 -# %bb.525: # %vector.body4434.preheader +# %bb.525: # %vector.body4661.preheader addu16i.d $a2, $a0, 1 addi.d $a2, $a2, -1504 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_526: # %vector.body4434 +.LBB5_526: # %vector.body4661 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_526 -# %bb.527: # %vector.body4439.preheader +# %bb.527: # %vector.body4666.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_528: # %vector.body4439 +.LBB5_528: # %vector.body4666 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_528 -# %bb.529: # %vector.body4444.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.529: # %vector.body4671.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_530: # %vector.body4444 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_530: # %vector.body4671 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_530 -# %bb.531: # %vector.body4449.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.531: # %vector.body4679.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_532: # %vector.body4449 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_532: # %vector.body4679 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_532 -# %bb.533: # %vector.body4454.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.533: # %vector.body4687.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_534: # %vector.body4454 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_534: # %vector.body4687 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_534 b .LBB5_573 -.LBB5_535: # %vector.body4404.preheader +.LBB5_535: # %vector.body4622.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_536: # %vector.body4404 +.LBB5_536: # %vector.body4622 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_536 -# %bb.537: # %vector.body4409.preheader +# %bb.537: # %vector.body4627.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_538: # %vector.body4409 +.LBB5_538: # %vector.body4627 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_538 -# %bb.539: # %vector.body4414.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.539: # %vector.body4632.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_540: # %vector.body4414 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_540: # %vector.body4632 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_540 -# %bb.541: # %vector.body4419.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.541: # %vector.body4640.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_542: # %vector.body4419 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_542: # %vector.body4640 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_542 -# %bb.543: # %vector.body4424.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.543: # %vector.body4648.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_544: # %vector.body4424 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_544: # %vector.body4648 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_544 b .LBB5_573 -.LBB5_545: # %vector.body4389.preheader +.LBB5_545: # %vector.body4601.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_546: # %vector.body4389 +.LBB5_546: # %vector.body4601 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_546 -# %bb.547: # %vector.body4394.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.547: # %vector.body4606.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_548: # %vector.body4394 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_548: # %vector.body4606 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_548 -# %bb.549: # %vector.body4399.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.549: # %vector.body4614.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_550: # %vector.body4399 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_550: # %vector.body4614 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_550 b .LBB5_573 -.LBB5_551: # %vector.body4374.preheader +.LBB5_551: # %vector.body4580.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_552: # %vector.body4374 +.LBB5_552: # %vector.body4580 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_552 -# %bb.553: # %vector.body4379.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.553: # %vector.body4585.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_554: # %vector.body4379 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_554: # %vector.body4585 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_554 -# %bb.555: # %vector.body4384.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.555: # %vector.body4593.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_556: # %vector.body4384 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_556: # %vector.body4593 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_556 b .LBB5_573 .LBB5_557: # %.preheader.i2121.preheader @@ -7140,19 +7043,19 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_558: # %vector.body4364 +.LBB5_558: # %vector.body4570 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_558 -# %bb.559: # %vector.body4369.preheader +# %bb.559: # %vector.body4575.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a0, $a0, 3328 -.LBB5_560: # %vector.body4369 +.LBB5_560: # %vector.body4575 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7174,7 +7077,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_562: # %vector.body4359 +.LBB5_562: # %vector.body4565 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7196,7 +7099,7 @@ init: # @init lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_564: # %vector.body4354 +.LBB5_564: # %vector.body4560 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7204,31 +7107,30 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_564 b .LBB5_573 -.LBB5_565: # %vector.body4349.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_566: # %vector.body4349 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_566 +.LBB5_565: # %vector.body4552.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_566: # %vector.body4552 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_566 b .LBB5_573 .LBB5_567: # %.preheader.i2154.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7377,43 +7279,43 @@ init: # @init bnez $a2, .LBB5_572 .LBB5_573: # %set1d.exit1086 move $a0, $zero - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $fp, $sp, 64 # 8-byte Folded Reload - ld.d $ra, $sp, 72 # 8-byte Folded Reload - addi.d $sp, $sp, 80 + ld.d $s2, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload + ld.d $s0, $sp, 72 # 8-byte Folded Reload + ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $ra, $sp, 88 # 8-byte Folded Reload + addi.d $sp, $sp, 96 ret .LBB5_574: # %.preheader.i2177.preheader pcalau12i $a0, %pc_hi20(global_data+640192) addi.d $a0, $a0, %pc_lo12(global_data+640192) b .LBB5_345 -.LBB5_575: # %vector.body4325.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_576: # %vector.body4325 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_576 +.LBB5_575: # %vector.body4525.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_576: # %vector.body4525 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_576 b .LBB5_573 -.LBB5_577: # %vector.body4320.preheader +.LBB5_577: # %vector.body4520.preheader pcalau12i $a0, %pc_hi20(global_data+32) addi.d $a0, $a0, %pc_lo12(global_data+32) lu12i.w $a1, 7 @@ -7421,7 +7323,7 @@ init: # @init lu12i.w $a2, 260096 ori $a2, $a2, 8 xvreplgr2vr.w $xr0, $a2 -.LBB5_578: # %vector.body4320 +.LBB5_578: # %vector.body4520 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7429,162 +7331,155 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_578 b .LBB5_573 -.LBB5_579: # %vector.body4310.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_580: # %vector.body4310 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_579: # %vector.body4504.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_580: # %vector.body4504 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_580 -# %bb.581: # %vector.body4315.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_582: # %vector.body4315 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.581: # %vector.body4512.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_582: # %vector.body4512 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_582 b .LBB5_573 -.LBB5_583: # %vector.body4305.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_584: # %vector.body4305 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_584 +.LBB5_583: # %vector.body4496.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_584: # %vector.body4496 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_584 b .LBB5_573 -.LBB5_585: # %vector.body4300.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_586: # %vector.body4300 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_586 +.LBB5_585: # %vector.body4488.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_586: # %vector.body4488 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_586 b .LBB5_573 -.LBB5_587: # %vector.body4295.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_588: # %vector.body4295 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_588 +.LBB5_587: # %vector.body4480.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_588: # %vector.body4480 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_588 b .LBB5_573 -.LBB5_589: # %vector.body4290.preheader +.LBB5_589: # %vector.body4472.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_590: # %vector.body4290 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_590 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_590: # %vector.body4472 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_590 b .LBB5_607 .LBB5_591: # %.preheader.i2230.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -7603,79 +7498,74 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 62 - ori $a0, $a0, 2100 - add.d $a1, $fp, $a0 - ori $a2, $zero, 2 + ori $a0, $a0, 2104 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + add.d $a2, $fp, $a0 lu12i.w $a0, 7 ori $a3, $a0, 3328 -.LBB5_592: # %vector.body4275 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_592: # %vector.body4448 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_592 -# %bb.593: # %vector.body4280.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.593: # %vector.body4456.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_594: # %vector.body4280 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_594: # %vector.body4456 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_594 -# %bb.595: # %vector.body4285.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.595: # %vector.body4464.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_596: # %vector.body4285 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_596: # %vector.body4464 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_596 b .LBB5_573 .LBB5_597: # %.preheader34.i2253.preheader @@ -7734,42 +7624,41 @@ init: # @init lu12i.w $a1, 220 ori $a1, $a1, 1212 b .LBB5_630 -.LBB5_600: # %vector.body4263.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_601: # %vector.body4263 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_601 - b .LBB5_573 -.LBB5_602: # %vector.body4256.preheader +.LBB5_600: # %vector.body4433.preheader pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) + lu12i.w $a1, 7 + ori $a1, $a1, 3328 +.LBB5_601: # %vector.body4433 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_601 + b .LBB5_573 +.LBB5_602: # %vector.body4426.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_603: # %vector.body4256 +.LBB5_603: # %vector.body4426 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -7804,45 +7693,44 @@ init: # @init pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) b .LBB5_610 -.LBB5_605: # %vector.body4251.preheader +.LBB5_605: # %vector.body4418.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_606: # %vector.body4251 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_606 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_606: # %vector.body4418 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_606 .LBB5_607: # %set1d.exit2229 lu12i.w $a1, 31 ori $a1, $a1, 1020 lu12i.w $a2, -262144 b .LBB5_687 -.LBB5_608: # %vector.body4246.preheader +.LBB5_608: # %vector.body4413.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a2, $a2, 3328 xvldi $xr0, -1424 -.LBB5_609: # %vector.body4246 +.LBB5_609: # %vector.body4413 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -7855,14 +7743,14 @@ init: # @init add.d $a0, $a0, $a2 ori $a2, $a1, 1024 b .LBB5_346 -.LBB5_611: # %vector.body4241.preheader +.LBB5_611: # %vector.body4408.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $s0, $a0, %pc_lo12(global_data) addi.d $a0, $s0, 32 lu12i.w $a1, 7 ori $a1, $a1, 3328 xvldi $xr0, -1424 -.LBB5_612: # %vector.body4241 +.LBB5_612: # %vector.body4408 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -7886,147 +7774,141 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 b .LBB5_573 -.LBB5_614: # %vector.body4216.preheader +.LBB5_614: # %vector.body4374.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_615: # %vector.body4216 +.LBB5_615: # %vector.body4374 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_615 -# %bb.616: # %vector.body4221.preheader +# %bb.616: # %vector.body4379.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_617: # %vector.body4221 +.LBB5_617: # %vector.body4379 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_617 -# %bb.618: # %vector.body4226.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.618: # %vector.body4384.preheader + lu12i.w $a3, 62 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_619: # %vector.body4226 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_619: # %vector.body4384 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_619 -# %bb.620: # %vector.body4231.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.620: # %vector.body4392.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_621: # %vector.body4231 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_621: # %vector.body4392 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_621 -# %bb.622: # %vector.body4236.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.622: # %vector.body4400.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_623: # %vector.body4236 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_623: # %vector.body4400 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_623 b .LBB5_573 -.LBB5_624: # %vector.body4211.preheader +.LBB5_624: # %vector.body4366.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_625: # %vector.body4211 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_625 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_625: # %vector.body4366 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_625 # %bb.626: # %set1d.exit2327 lu12i.w $a1, 31 b .LBB5_686 -.LBB5_627: # %vector.body4204.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_627: # %vector.body4359.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_628: # %vector.body4204 +.LBB5_628: # %vector.body4359 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8075,81 +7957,77 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ori $a0, $s0, 1044 + ori $a0, $s0, 1048 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) add.d $a0, $fp, $a0 - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_632: # %vector.body4199 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_632 - b .LBB5_573 -.LBB5_633: # %vector.body4189.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_634: # %vector.body4189 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_634 -# %bb.635: # %vector.body4194.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_636: # %vector.body4194 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_632: # %vector.body4351 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_632 + b .LBB5_573 +.LBB5_633: # %vector.body4335.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_634: # %vector.body4335 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_634 +# %bb.635: # %vector.body4343.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_636: # %vector.body4343 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_636 b .LBB5_573 .LBB5_637: # %.preheader34.i2352.preheader @@ -8248,26 +8126,26 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a1, .LBB5_640 b .LBB5_573 -.LBB5_641: # %vector.body4167.preheader +.LBB5_641: # %vector.body4313.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_642: # %vector.body4167 +.LBB5_642: # %vector.body4313 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_642 -# %bb.643: # %vector.body4172.preheader +# %bb.643: # %vector.body4318.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_644: # %vector.body4172 +.LBB5_644: # %vector.body4318 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8275,78 +8153,75 @@ init: # @init addi.d $a2, $a2, 64 bnez $a1, .LBB5_644 b .LBB5_653 -.LBB5_645: # %vector.body4157.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_646: # %vector.body4157 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_645: # %vector.body4297.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_646: # %vector.body4297 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_646 -# %bb.647: # %vector.body4162.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_648: # %vector.body4162 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.647: # %vector.body4305.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_648: # %vector.body4305 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_648 b .LBB5_573 -.LBB5_649: # %vector.body4147.preheader +.LBB5_649: # %vector.body4287.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_650: # %vector.body4147 +.LBB5_650: # %vector.body4287 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_650 -# %bb.651: # %vector.body4152.preheader +# %bb.651: # %vector.body4292.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_652: # %vector.body4152 +.LBB5_652: # %vector.body4292 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 @@ -8359,134 +8234,128 @@ init: # @init lu12i.w $a2, 260096 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_654: # %vector.body4132.preheader +.LBB5_654: # %vector.body4266.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_655: # %vector.body4132 +.LBB5_655: # %vector.body4266 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_655 -# %bb.656: # %vector.body4137.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.656: # %vector.body4271.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_657: # %vector.body4137 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_657: # %vector.body4271 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_657 -# %bb.658: # %vector.body4142.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.658: # %vector.body4279.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_659: # %vector.body4142 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_659: # %vector.body4279 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_659 b .LBB5_573 -.LBB5_660: # %vector.body4117.preheader +.LBB5_660: # %vector.body4245.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_661: # %vector.body4117 +.LBB5_661: # %vector.body4245 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_661 -# %bb.662: # %vector.body4122.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_663: # %vector.body4122 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +# %bb.662: # %vector.body4250.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a1, 3328 +.LBB5_663: # %vector.body4250 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_663 -# %bb.664: # %vector.body4127.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.664: # %vector.body4258.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_665: # %vector.body4127 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_665: # %vector.body4258 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_665 b .LBB5_573 .LBB5_666: # %.preheader.i2421.preheader @@ -8503,74 +8372,71 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_667: # %vector.body4097 +.LBB5_667: # %vector.body4219 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_667 -# %bb.668: # %vector.body4102.preheader +# %bb.668: # %vector.body4224.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 ori $a2, $a0, 3328 -.LBB5_669: # %vector.body4102 +.LBB5_669: # %vector.body4224 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_669 -# %bb.670: # %vector.body4107.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.670: # %vector.body4229.preheader + lu12i.w $a2, 93 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 3160 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_671: # %vector.body4107 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_671: # %vector.body4229 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_671 -# %bb.672: # %vector.body4112.preheader - lu12i.w $a1, 125 - ori $a1, $a1, 132 +# %bb.672: # %vector.body4237.preheader + lu12i.w $a2, 125 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 136 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_673: # %vector.body4112 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_673: # %vector.body4237 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_673 b .LBB5_573 .LBB5_674: # %.preheader.i2443.preheader @@ -8719,69 +8585,66 @@ init: # @init addi.d $a0, $a0, 1024 bnez $a2, .LBB5_679 b .LBB5_573 -.LBB5_680: # %vector.body4063.preheader +.LBB5_680: # %vector.body4179.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a1, $a0, 32 lu12i.w $a2, 7 ori $a3, $a2, 3328 xvldi $xr0, -1424 -.LBB5_681: # %vector.body4063 +.LBB5_681: # %vector.body4179 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a3, $a3, -16 addi.d $a1, $a1, 64 bnez $a3, .LBB5_681 -# %bb.682: # %vector.body4068.preheader +# %bb.682: # %vector.body4184.preheader lu12i.w $a1, 31 - ori $a3, $a1, 1044 - add.d $a3, $a0, $a3 - ori $a4, $zero, 2 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a4, $a1, 1048 + add.d $a4, $a0, $a4 ori $a5, $a2, 3328 -.LBB5_683: # %vector.body4068 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a6, $a4, 31, 0 - addi.d $a7, $a4, -1 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa0, $a7 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a5, $a5, -2 - addi.w $a4, $a4, 2 +.LBB5_683: # %vector.body4184 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a4, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a5, $a5, -4 + addi.d $a4, $a4, 16 bnez $a5, .LBB5_683 -# %bb.684: # %vector.body4073.preheader - lu12i.w $a3, 62 - ori $a3, $a3, 2100 +# %bb.684: # %vector.body4192.preheader + lu12i.w $a4, 62 + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a3, $a4, 2104 add.d $a3, $a0, $a3 - ori $a4, $zero, 2 ori $a2, $a2, 3328 -.LBB5_685: # %vector.body4073 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a4, 31, 0 - addi.d $a6, $a4, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a3, -4 - fst.s $fa1, $a3, 0 - addi.d $a3, $a3, 8 - addi.d $a2, $a2, -2 - addi.w $a4, $a4, 2 +.LBB5_685: # %vector.body4192 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a3, $a3, 16 bnez $a2, .LBB5_685 .LBB5_686: # %set1d.exit2479 ori $a1, $a1, 1020 @@ -8790,16 +8653,16 @@ init: # @init lu32i.d $a2, 0 stx.w $a2, $a0, $a1 b .LBB5_573 -.LBB5_688: # %vector.body4051.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_688: # %vector.body4167.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_689: # %vector.body4051 +.LBB5_689: # %vector.body4167 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8830,12 +8693,12 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_689 -# %bb.690: # %vector.body4058.preheader +# %bb.690: # %vector.body4174.preheader pcalau12i $a1, %pc_hi20(global_data+128048) addi.d $a1, $a1, %pc_lo12(global_data+128048) ori $a0, $a0, 3328 xvldi $xr0, -1424 -.LBB5_691: # %vector.body4058 +.LBB5_691: # %vector.body4174 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -8843,28 +8706,28 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_691 b .LBB5_573 -.LBB5_692: # %vector.body4039.preheader +.LBB5_692: # %vector.body4155.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_693: # %vector.body4039 +.LBB5_693: # %vector.body4155 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_693 -# %bb.694: # %vector.body4044.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.694: # %vector.body4160.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_695: # %vector.body4044 +.LBB5_695: # %vector.body4160 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8904,15 +8767,15 @@ init: # @init move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_697: # %vector.body4032 +.LBB5_697: # %vector.body4148 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8944,28 +8807,28 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_697 b .LBB5_573 -.LBB5_698: # %vector.body4020.preheader +.LBB5_698: # %vector.body4136.preheader pcalau12i $a0, %pc_hi20(array+32) addi.d $a0, $a0, %pc_lo12(array+32) lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_699: # %vector.body4020 +.LBB5_699: # %vector.body4136 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_699 -# %bb.700: # %vector.body4025.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +# %bb.700: # %vector.body4141.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) ori $a1, $a1, 3328 -.LBB5_701: # %vector.body4025 +.LBB5_701: # %vector.body4141 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -8997,30 +8860,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_701 b .LBB5_573 -.LBB5_702: # %vector.body4008.preheader +.LBB5_702: # %vector.body4124.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_703: # %vector.body4008 +.LBB5_703: # %vector.body4124 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_703 -# %bb.704: # %vector.body4013.preheader +# %bb.704: # %vector.body4129.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_705: # %vector.body4013 +.LBB5_705: # %vector.body4129 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9052,30 +8915,30 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_705 b .LBB5_573 -.LBB5_706: # %vector.body3996.preheader +.LBB5_706: # %vector.body4112.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_707: # %vector.body3996 +.LBB5_707: # %vector.body4112 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_707 -# %bb.708: # %vector.body4001.preheader +# %bb.708: # %vector.body4117.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_709: # %vector.body4001 +.LBB5_709: # %vector.body4117 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9107,7 +8970,7 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_709 b .LBB5_573 -.LBB5_710: # %vector.body3971.preheader +.LBB5_710: # %vector.body4081.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) addi.d $a0, $fp, 32 @@ -9115,7 +8978,7 @@ init: # @init ori $a1, $s0, 3328 xvldi $xr0, -1424 xvst $xr0, $sp, 16 # 32-byte Folded Spill -.LBB5_711: # %vector.body3971 +.LBB5_711: # %vector.body4081 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9123,63 +8986,60 @@ init: # @init addi.d $a1, $a1, -16 addi.d $a0, $a0, 64 bnez $a1, .LBB5_711 -# %bb.712: # %vector.body3976.preheader +# %bb.712: # %vector.body4086.preheader lu12i.w $a1, 31 - ori $a0, $a1, 1044 - add.d $a0, $fp, $a0 - ori $a2, $zero, 2 + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a2, $a1, 1048 + add.d $a2, $fp, $a2 ori $a3, $s0, 3328 -.LBB5_713: # %vector.body3976 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_713: # %vector.body4086 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_713 -# %bb.714: # %vector.body3981.preheader - lu12i.w $a0, 62 - ori $a0, $a0, 2100 +# %bb.714: # %vector.body4094.preheader + lu12i.w $a2, 62 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a2, 2104 add.d $a0, $fp, $a0 - ori $a2, $zero, 2 - ori $a3, $s0, 3328 -.LBB5_715: # %vector.body3981 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_715 -# %bb.716: # %vector.body3986.preheader + ori $a2, $s0, 3328 +.LBB5_715: # %vector.body4094 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a0, $a0, 16 + bnez $a2, .LBB5_715 +# %bb.716: # %vector.body4102.preheader lu12i.w $a0, 93 ori $a0, $a0, 3184 add.d $a0, $fp, $a0 ori $a2, $s0, 3328 xvldi $xr0, -1296 -.LBB5_717: # %vector.body3986 +.LBB5_717: # %vector.body4102 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -9198,7 +9058,7 @@ init: # @init ori $a0, $a0, 2500 add.d $a0, $fp, $a0 ori $a1, $s0, 3328 -.LBB5_719: # %vector.body3991 +.LBB5_719: # %vector.body4107 # =>This Inner Loop Header: Depth=1 xvld $xr0, $sp, 16 # 32-byte Folded Reload xvst $xr0, $a0, -32 @@ -9207,238 +9067,225 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_719 b .LBB5_573 -.LBB5_720: # %vector.body3946.preheader +.LBB5_720: # %vector.body4044.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_721: # %vector.body3946 +.LBB5_721: # %vector.body4044 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_721 -# %bb.722: # %vector.body3951.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.722: # %vector.body4049.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_723: # %vector.body3951 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_723: # %vector.body4049 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_723 -# %bb.724: # %vector.body3956.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.724: # %vector.body4057.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_725: # %vector.body3956 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_725: # %vector.body4057 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_725 -# %bb.726: # %vector.body3961.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.726: # %vector.body4065.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_727: # %vector.body3961 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_727: # %vector.body4065 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_727 -# %bb.728: # %vector.body3966.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.728: # %vector.body4073.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_729: # %vector.body3966 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_729: # %vector.body4073 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_729 b .LBB5_573 -.LBB5_730: # %vector.body3931.preheader +.LBB5_730: # %vector.body4023.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_731: # %vector.body3931 +.LBB5_731: # %vector.body4023 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_731 -# %bb.732: # %vector.body3936.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.732: # %vector.body4028.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_733: # %vector.body3936 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_733: # %vector.body4028 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_733 -# %bb.734: # %vector.body3941.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.734: # %vector.body4036.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_735: # %vector.body3941 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_735: # %vector.body4036 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_735 b .LBB5_573 -.LBB5_736: # %vector.body3921.preheader +.LBB5_736: # %vector.body4007.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a2, $a0, $a1 - ori $a3, $zero, 2 + ori $a1, $a1, 1048 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + add.d $a3, $a0, $a1 lu12i.w $a1, 7 ori $a4, $a1, 3328 -.LBB5_737: # %vector.body3921 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_737: # %vector.body4007 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_737 -# %bb.738: # %vector.body3926.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.738: # %vector.body4015.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_739: # %vector.body3926 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_739: # %vector.body4015 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_739 b .LBB5_573 .LBB5_740: # %.preheader.i2608.preheader @@ -9455,14 +9302,14 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_741: # %vector.body3911 +.LBB5_741: # %vector.body3997 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_741 -# %bb.742: # %vector.body3916.preheader +# %bb.742: # %vector.body4002.preheader lu12i.w $a1, 62 ori $a1, $a1, 2128 add.d $a1, $fp, $a1 @@ -9470,7 +9317,7 @@ init: # @init lu12i.w $a2, 219235 ori $a2, $a2, 1981 xvreplgr2vr.w $xr0, $a2 -.LBB5_743: # %vector.body3916 +.LBB5_743: # %vector.body4002 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -9488,14 +9335,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_745: # %vector.body3904 +.LBB5_745: # %vector.body3990 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9527,248 +9374,237 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_745 b .LBB5_573 -.LBB5_746: # %vector.body3879.preheader +.LBB5_746: # %vector.body3959.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_747: # %vector.body3879 +.LBB5_747: # %vector.body3959 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_747 -# %bb.748: # %vector.body3884.preheader +# %bb.748: # %vector.body3964.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_749: # %vector.body3884 +.LBB5_749: # %vector.body3964 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_749 -# %bb.750: # %vector.body3889.preheader +# %bb.750: # %vector.body3969.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 -.LBB5_751: # %vector.body3889 +.LBB5_751: # %vector.body3969 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_751 -# %bb.752: # %vector.body3894.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.752: # %vector.body3974.preheader + lu12i.w $a3, 93 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 3160 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_753: # %vector.body3894 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_753: # %vector.body3974 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_753 -# %bb.754: # %vector.body3899.preheader - lu12i.w $a2, 125 - ori $a2, $a2, 132 +# %bb.754: # %vector.body3982.preheader + lu12i.w $a3, 125 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 136 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_755: # %vector.body3899 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_755: # %vector.body3982 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_755 b .LBB5_573 -.LBB5_756: # %vector.body3859.preheader +.LBB5_756: # %vector.body3930.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_757: # %vector.body3859 +.LBB5_757: # %vector.body3930 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_757 -# %bb.758: # %vector.body3864.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.758: # %vector.body3935.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_759: # %vector.body3864 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_759: # %vector.body3935 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_759 -# %bb.760: # %vector.body3869.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.760: # %vector.body3943.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 2104 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_761: # %vector.body3869 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_761: # %vector.body3943 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_761 -# %bb.762: # %vector.body3874.preheader - lu12i.w $a2, 93 - ori $a2, $a2, 3156 +# %bb.762: # %vector.body3951.preheader + lu12i.w $a3, 93 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 3160 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_763: # %vector.body3874 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_763: # %vector.body3951 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_763 b .LBB5_573 -.LBB5_764: # %vector.body3844.preheader +.LBB5_764: # %vector.body3909.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_765: # %vector.body3844 +.LBB5_765: # %vector.body3909 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_765 -# %bb.766: # %vector.body3849.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 - ori $a4, $a1, 3328 -.LBB5_767: # %vector.body3849 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_767 -# %bb.768: # %vector.body3854.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_769: # %vector.body3854 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.766: # %vector.body3914.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 + ori $a4, $a1, 3328 +.LBB5_767: # %vector.body3914 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_767 +# %bb.768: # %vector.body3922.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_769: # %vector.body3922 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_769 b .LBB5_573 .LBB5_770: # %.preheader.i2684.preheader @@ -9785,103 +9621,99 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_771: # %vector.body3829 +.LBB5_771: # %vector.body3888 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_771 -# %bb.772: # %vector.body3834.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.772: # %vector.body3893.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_773: # %vector.body3834 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_773: # %vector.body3893 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_773 -# %bb.774: # %vector.body3839.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.774: # %vector.body3901.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_775: # %vector.body3839 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_775: # %vector.body3901 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_775 b .LBB5_573 -.LBB5_776: # %vector.body3819.preheader +.LBB5_776: # %vector.body3875.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 32 - lu12i.w $a2, 7 - ori $a3, $a2, 3328 + addi.d $a2, $a0, 32 + lu12i.w $a1, 7 + ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_777: # %vector.body3819 +.LBB5_777: # %vector.body3875 # =>This Inner Loop Header: Depth=1 - xvst $xr0, $a1, -32 - xvst $xr0, $a1, 0 + xvst $xr0, $a2, -32 + xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 - addi.d $a1, $a1, 64 + addi.d $a2, $a2, 64 bnez $a3, .LBB5_777 -# %bb.778: # %vector.body3824.preheader - lu12i.w $a1, 31 - ori $a1, $a1, 1044 - add.d $a0, $a0, $a1 - ori $a1, $zero, 2 - ori $a2, $a2, 3328 -.LBB5_779: # %vector.body3824 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_779 +# %bb.778: # %vector.body3880.preheader + lu12i.w $a2, 31 + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr0, $a3, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 1048 + add.d $a0, $a0, $a2 + ori $a1, $a1, 3328 +.LBB5_779: # %vector.body3880 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_779 b .LBB5_573 .LBB5_780: # %.preheader.i2711.preheader pcalau12i $a0, %pc_hi20(global_data) @@ -9897,23 +9729,23 @@ init: # @init lu12i.w $a1, 7 ori $a2, $a1, 3328 xvldi $xr0, -1424 -.LBB5_781: # %vector.body3807 +.LBB5_781: # %vector.body3863 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 addi.d $a2, $a2, -16 addi.d $a0, $a0, 64 bnez $a2, .LBB5_781 -# %bb.782: # %vector.body3812.preheader +# %bb.782: # %vector.body3868.preheader lu12i.w $a0, 62 - pcalau12i $a2, %pc_hi20(.LCPI5_0) - xvld $xr0, $a2, %pc_lo12(.LCPI5_0) pcalau12i $a2, %pc_hi20(.LCPI5_1) - xvld $xr1, $a2, %pc_lo12(.LCPI5_1) + xvld $xr0, $a2, %pc_lo12(.LCPI5_1) + pcalau12i $a2, %pc_hi20(.LCPI5_2) + xvld $xr1, $a2, %pc_lo12(.LCPI5_2) ori $a0, $a0, 2096 add.d $a0, $fp, $a0 ori $a1, $a1, 3328 -.LBB5_783: # %vector.body3812 +.LBB5_783: # %vector.body3868 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -9959,142 +9791,135 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_785: # %vector.body3792 +.LBB5_785: # %vector.body3842 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_785 -# %bb.786: # %vector.body3797.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.786: # %vector.body3847.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_787: # %vector.body3797 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_787: # %vector.body3847 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_787 -# %bb.788: # %vector.body3802.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.788: # %vector.body3855.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_789: # %vector.body3802 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_789: # %vector.body3855 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_789 b .LBB5_573 -.LBB5_790: # %vector.body3782.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 - lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_791: # %vector.body3782 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_790: # %vector.body3826.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_791: # %vector.body3826 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_791 -# %bb.792: # %vector.body3787.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 - ori $a1, $a1, 3328 -.LBB5_793: # %vector.body3787 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +# %bb.792: # %vector.body3834.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_793: # %vector.body3834 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_793 b .LBB5_573 -.LBB5_794: # %vector.body3770.preheader +.LBB5_794: # %vector.body3811.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a1, $a0, 4 - ori $a2, $zero, 2 - lu12i.w $a3, 7 - ori $a3, $a3, 3328 -.LBB5_795: # %vector.body3770 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 - bnez $a3, .LBB5_795 + addi.d $a1, $a0, 8 + lu12i.w $a2, 7 + ori $a2, $a2, 3328 +.LBB5_795: # %vector.body3811 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a2, $a2, -4 + addi.d $a1, $a1, 16 + bnez $a2, .LBB5_795 # %bb.796: # %.preheader34.i2757.preheader lu12i.w $a1, 156 ori $a1, $a1, 1728 @@ -10160,127 +9985,121 @@ init: # @init lu12i.w $a0, 7 ori $a2, $a0, 3328 xvldi $xr0, -1424 -.LBB5_799: # %vector.body3755 +.LBB5_799: # %vector.body3790 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 addi.d $a2, $a2, -16 addi.d $a1, $a1, 64 bnez $a2, .LBB5_799 -# %bb.800: # %vector.body3760.preheader - lu12i.w $a1, 62 - ori $a1, $a1, 2100 - add.d $a1, $fp, $a1 - ori $a2, $zero, 2 +# %bb.800: # %vector.body3795.preheader + lu12i.w $a2, 62 + pcalau12i $a1, %pc_hi20(.LCPI5_0) + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a2, $a2, 2104 + add.d $a2, $fp, $a2 ori $a3, $a0, 3328 -.LBB5_801: # %vector.body3760 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a2, 31, 0 - addi.d $a5, $a2, -1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa0, $a5 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a4 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a3, $a3, -2 - addi.w $a2, $a2, 2 +.LBB5_801: # %vector.body3795 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a2, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 16 bnez $a3, .LBB5_801 -# %bb.802: # %vector.body3765.preheader - lu12i.w $a1, 93 - ori $a1, $a1, 3156 +# %bb.802: # %vector.body3803.preheader + lu12i.w $a2, 93 + vld $vr0, $a1, %pc_lo12(.LCPI5_0) + ori $a1, $a2, 3160 add.d $a1, $fp, $a1 - ori $a2, $zero, 2 ori $a0, $a0, 3328 -.LBB5_803: # %vector.body3765 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a1, -4 - fst.s $fa1, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, -2 - addi.w $a2, $a2, 2 +.LBB5_803: # %vector.body3803 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a1, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a0, $a0, -4 + addi.d $a1, $a1, 16 bnez $a0, .LBB5_803 b .LBB5_573 -.LBB5_804: # %vector.body3740.preheader +.LBB5_804: # %vector.body3769.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_805: # %vector.body3740 +.LBB5_805: # %vector.body3769 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_805 -# %bb.806: # %vector.body3745.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.806: # %vector.body3774.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_807: # %vector.body3745 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_807: # %vector.body3774 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_807 -# %bb.808: # %vector.body3750.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.808: # %vector.body3782.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_809: # %vector.body3750 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_809: # %vector.body3782 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_809 b .LBB5_573 .LBB5_810: # %.preheader.i2797.preheader @@ -10293,14 +10112,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_811: # %vector.body3733 +.LBB5_811: # %vector.body3762 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10342,14 +10161,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_813: # %vector.body3726 +.LBB5_813: # %vector.body3755 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10391,14 +10210,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_815: # %vector.body3719 +.LBB5_815: # %vector.body3748 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10440,14 +10259,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_817: # %vector.body3712 +.LBB5_817: # %vector.body3741 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10489,14 +10308,14 @@ init: # @init pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $s0, 1040 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - xvld $xr0, $a1, %pc_lo12(.LCPI5_0) pcalau12i $a1, %pc_hi20(.LCPI5_1) - xvld $xr1, $a1, %pc_lo12(.LCPI5_1) + xvld $xr0, $a1, %pc_lo12(.LCPI5_1) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + xvld $xr1, $a1, %pc_lo12(.LCPI5_2) add.d $a0, $fp, $a0 lu12i.w $a1, 7 ori $a1, $a1, 3328 -.LBB5_819: # %vector.body3705 +.LBB5_819: # %vector.body3734 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10528,26 +10347,26 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_819 b .LBB5_573 -.LBB5_820: # %vector.body3695.preheader +.LBB5_820: # %vector.body3724.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_821: # %vector.body3695 +.LBB5_821: # %vector.body3724 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_821 -# %bb.822: # %vector.body3700.preheader +# %bb.822: # %vector.body3729.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_823: # %vector.body3700 +.LBB5_823: # %vector.body3729 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10555,95 +10374,92 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_823 b .LBB5_573 -.LBB5_824: # %vector.body3680.preheader +.LBB5_824: # %vector.body3703.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_825: # %vector.body3680 +.LBB5_825: # %vector.body3703 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_825 -# %bb.826: # %vector.body3685.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a2, $a0, $a2 - ori $a3, $zero, 2 +# %bb.826: # %vector.body3708.preheader + lu12i.w $a3, 31 + pcalau12i $a2, %pc_hi20(.LCPI5_0) + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a3, $a3, 1048 + add.d $a3, $a0, $a3 ori $a4, $a1, 3328 -.LBB5_827: # %vector.body3685 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 +.LBB5_827: # %vector.body3708 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 bnez $a4, .LBB5_827 -# %bb.828: # %vector.body3690.preheader - lu12i.w $a2, 62 - ori $a2, $a2, 2100 +# %bb.828: # %vector.body3716.preheader + lu12i.w $a3, 62 + vld $vr0, $a2, %pc_lo12(.LCPI5_0) + ori $a2, $a3, 2104 add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_829: # %vector.body3690 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_829: # %vector.body3716 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_829 b .LBB5_573 -.LBB5_830: # %vector.body3668.preheader +.LBB5_830: # %vector.body3691.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_831: # %vector.body3668 +.LBB5_831: # %vector.body3691 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_831 -# %bb.832: # %vector.body3673.preheader +# %bb.832: # %vector.body3696.preheader lu12i.w $a2, 31 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - xvld $xr0, $a3, %pc_lo12(.LCPI5_0) pcalau12i $a3, %pc_hi20(.LCPI5_1) - xvld $xr1, $a3, %pc_lo12(.LCPI5_1) + xvld $xr0, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_2) + xvld $xr1, $a3, %pc_lo12(.LCPI5_2) ori $a2, $a2, 1040 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 -.LBB5_833: # %vector.body3673 +.LBB5_833: # %vector.body3696 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10675,16 +10491,16 @@ init: # @init addi.d $a0, $a0, 32 bnez $a1, .LBB5_833 b .LBB5_573 -.LBB5_834: # %vector.body3653.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - xvld $xr0, $a0, %pc_lo12(.LCPI5_0) +.LBB5_834: # %vector.body3674.preheader pcalau12i $a0, %pc_hi20(.LCPI5_1) - xvld $xr1, $a0, %pc_lo12(.LCPI5_1) + xvld $xr0, $a0, %pc_lo12(.LCPI5_1) + pcalau12i $a0, %pc_hi20(.LCPI5_2) + xvld $xr1, $a0, %pc_lo12(.LCPI5_2) pcalau12i $a0, %pc_hi20(global_data) addi.d $a1, $a0, %pc_lo12(global_data) lu12i.w $a0, 7 ori $a2, $a0, 3328 -.LBB5_835: # %vector.body3653 +.LBB5_835: # %vector.body3674 # =>This Inner Loop Header: Depth=1 xvaddi.du $xr2, $xr1, 1 xvaddi.du $xr3, $xr0, 1 @@ -10715,7 +10531,7 @@ init: # @init addi.d $a2, $a2, -8 addi.d $a1, $a1, 32 bnez $a2, .LBB5_835 -# %bb.836: # %vector.body3658.preheader +# %bb.836: # %vector.body3681.preheader pcalau12i $a1, %pc_hi20(global_data) addi.d $a1, $a1, %pc_lo12(global_data) lu12i.w $a2, 31 @@ -10723,20 +10539,20 @@ init: # @init add.d $a2, $a1, $a2 ori $a3, $a0, 3328 xvldi $xr0, -1424 -.LBB5_837: # %vector.body3658 +.LBB5_837: # %vector.body3681 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_837 -# %bb.838: # %vector.body3663.preheader +# %bb.838: # %vector.body3686.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a1, $a1, $a2 ori $a0, $a0, 3328 xvldi $xr0, -1296 -.LBB5_839: # %vector.body3663 +.LBB5_839: # %vector.body3686 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a1, -32 xvst $xr0, $a1, 0 @@ -10744,40 +10560,40 @@ init: # @init addi.d $a1, $a1, 64 bnez $a0, .LBB5_839 b .LBB5_573 -.LBB5_840: # %vector.body3638.preheader +.LBB5_840: # %vector.body3659.preheader pcalau12i $a0, %pc_hi20(global_data) addi.d $a0, $a0, %pc_lo12(global_data) addi.d $a2, $a0, 32 lu12i.w $a1, 7 ori $a3, $a1, 3328 xvldi $xr0, -1424 -.LBB5_841: # %vector.body3638 +.LBB5_841: # %vector.body3659 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_841 -# %bb.842: # %vector.body3643.preheader +# %bb.842: # %vector.body3664.preheader lu12i.w $a2, 31 ori $a2, $a2, 1072 add.d $a2, $a0, $a2 ori $a3, $a1, 3328 xvldi $xr0, -3264 -.LBB5_843: # %vector.body3643 +.LBB5_843: # %vector.body3664 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a2, -32 xvst $xr0, $a2, 0 addi.d $a3, $a3, -16 addi.d $a2, $a2, 64 bnez $a3, .LBB5_843 -# %bb.844: # %vector.body3648.preheader +# %bb.844: # %vector.body3669.preheader lu12i.w $a2, 62 ori $a2, $a2, 2128 add.d $a0, $a0, $a2 ori $a1, $a1, 3328 xvldi $xr0, -3265 -.LBB5_845: # %vector.body3648 +.LBB5_845: # %vector.body3669 # =>This Inner Loop Header: Depth=1 xvst $xr0, $a0, -32 xvst $xr0, $a0, 0 @@ -10785,82 +10601,78 @@ init: # @init addi.d $a0, $a0, 64 bnez $a1, .LBB5_845 b .LBB5_573 -.LBB5_846: # %vector.body3633.preheader - pcalau12i $a0, %pc_hi20(global_data+4) - addi.d $a0, $a0, %pc_lo12(global_data+4) - ori $a1, $zero, 2 - lu12i.w $a2, 7 - ori $a2, $a2, 3328 -.LBB5_847: # %vector.body3633 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a1, 31, 0 - addi.d $a4, $a1, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a2, $a2, -2 - addi.w $a1, $a1, 2 - bnez $a2, .LBB5_847 - b .LBB5_573 -.LBB5_848: # %vector.body3623.preheader - pcalau12i $a0, %pc_hi20(global_data) - addi.d $a0, $a0, %pc_lo12(global_data) - addi.d $a2, $a0, 4 - ori $a3, $zero, 2 +.LBB5_846: # %vector.body3651.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a0, %pc_hi20(global_data+8) + addi.d $a0, $a0, %pc_lo12(global_data+8) lu12i.w $a1, 7 - ori $a4, $a1, 3328 -.LBB5_849: # %vector.body3623 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a5, $a3, 31, 0 - addi.d $a6, $a3, -1 - bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa0, $a6 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a2, -4 - fst.s $fa1, $a2, 0 - addi.d $a2, $a2, 8 - addi.d $a4, $a4, -2 - addi.w $a3, $a3, 2 - bnez $a4, .LBB5_849 -# %bb.850: # %vector.body3628.preheader - lu12i.w $a2, 31 - ori $a2, $a2, 1044 - add.d $a0, $a0, $a2 - ori $a2, $zero, 2 ori $a1, $a1, 3328 -.LBB5_851: # %vector.body3628 - # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a2, 31, 0 - addi.d $a4, $a2, -1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa0, $a4 - ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a3 - ffint.d.l $fa1, $fa1 - frecip.d $fa0, $fa0 - frecip.d $fa1, $fa1 - fcvt.s.d $fa0, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa0, $a0, -4 - fst.s $fa1, $a0, 0 - addi.d $a0, $a0, 8 - addi.d $a1, $a1, -2 - addi.w $a2, $a2, 2 +.LBB5_847: # %vector.body3651 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 + bnez $a1, .LBB5_847 + b .LBB5_573 +.LBB5_848: # %vector.body3635.preheader + pcalau12i $a0, %pc_hi20(.LCPI5_0) + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + pcalau12i $a1, %pc_hi20(global_data) + addi.d $a1, $a1, %pc_lo12(global_data) + addi.d $a3, $a1, 8 + lu12i.w $a2, 7 + ori $a4, $a2, 3328 +.LBB5_849: # %vector.body3635 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a3, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB5_849 +# %bb.850: # %vector.body3643.preheader + lu12i.w $a3, 31 + vld $vr0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $a3, 1048 + add.d $a0, $a1, $a0 + ori $a1, $a2, 3328 +.LBB5_851: # %vector.body3643 + # =>This Inner Loop Header: Depth=1 + vshuf4i.w $vr1, $vr0, 8 + vaddi.wu $vr2, $vr1, 1 + vaddi.wu $vr1, $vr1, 3 + vext2xv.du.wu $xr2, $xr2 + xvffint.d.lu $xr2, $xr2 + vext2xv.du.wu $xr1, $xr1 + xvffint.d.lu $xr1, $xr1 + vfrecip.d $vr2, $vr2 + vfrecip.d $vr1, $vr1 + vfcvt.s.d $vr1, $vr1, $vr2 + vst $vr1, $a0, -8 + vaddi.du $vr0, $vr0, 4 + addi.d $a1, $a1, -4 + addi.d $a0, $a0, 16 bnez $a1, .LBB5_851 b .LBB5_573 .Lfunc_end5: diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Halignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Halignmm.s index a16dffd..727befa 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Halignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Halignmm.s @@ -445,16 +445,25 @@ imp_match_init_strictH: # @imp_match_init_strictH .word 7 # 0x7 .word 8 # 0x8 .LCPI2_2: + .word 0 # 0x0 + .word 1 # 0x1 + .word 4 # 0x4 + .word 5 # 0x5 + .word 2 # 0x2 + .word 3 # 0x3 + .word 6 # 0x6 + .word 7 # 0x7 +.LCPI2_3: .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 .dword 8 # 0x8 -.LCPI2_3: +.LCPI2_4: .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 .dword 4 # 0x4 -.LCPI2_6: +.LCPI2_7: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -465,15 +474,15 @@ imp_match_init_strictH: # @imp_match_init_strictH .word 7 # 0x7 .section .rodata.cst16,"aM",@progbits,16 .p2align 4, 0x0 -.LCPI2_4: +.LCPI2_5: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 .word 4 # 0x4 -.LCPI2_5: +.LCPI2_6: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI2_7: +.LCPI2_8: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -1412,71 +1421,52 @@ H__align: # @H__align bstrins.d $a3, $a5, 2, 0 pcalau12i $a5, %pc_hi20(.LCPI2_1) xvld $xr0, $a5, %pc_lo12(.LCPI2_1) - xvreplgr2vr.w $xr1, $a0 + pcalau12i $a5, %pc_hi20(.LCPI2_2) + xvld $xr1, $a5, %pc_lo12(.LCPI2_2) + xvreplgr2vr.w $xr2, $a0 addi.d $a5, $s1, 4 - xvldi $xr2, -800 + xvldi $xr3, -800 move $a6, $a4 .p2align 4, , 16 .LBB2_43: # %vector.body933 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr1, $xr0 - xvpermi.q $xr4, $xr3, 1 + xvmul.w $xr4, $xr2, $xr0 + xvpermi.q $xr5, $xr4, 1 + vext2xv.d.w $xr5, $xr5 + xvffint.d.l $xr5, $xr5 vext2xv.d.w $xr4, $xr4 + xvld $xr6, $a5, 0 xvffint.d.l $xr4, $xr4 - vext2xv.d.w $xr3, $xr3 - xvld $xr5, $a5, 0 - xvffint.d.l $xr3, $xr3 - xvfmul.d $xr3, $xr3, $xr2 - xvfmul.d $xr4, $xr4, $xr2 - xvpermi.q $xr6, $xr5, 1 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + xvfmul.d $xr4, $xr4, $xr3 + xvfmul.d $xr5, $xr5, $xr3 + xvpermi.q $xr7, $xr6, 1 + vreplvei.w $vr8, $vr7, 3 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr7, 1 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr8, 16 + xvpermi.q $xr7, $xr9, 2 + vreplvei.w $vr8, $vr6, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 + fcvt.d.s $ft0, $ft0 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvpermi.q $xr6, $xr8, 2 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 + xvfadd.d $xr5, $xr7, $xr5 xvfadd.d $xr4, $xr6, $xr4 - xvfadd.d $xr3, $xr5, $xr3 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpermi.q $xr5, $xr6, 2 - xvst $xr5, $a5, 0 + xvfcvt.s.d $xr4, $xr5, $xr4 + xvperm.w $xr4, $xr4, $xr1 + xvst $xr4, $a5, 0 xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 @@ -1525,71 +1515,52 @@ H__align: # @H__align bstrins.d $a3, $a5, 2, 0 pcalau12i $a5, %pc_hi20(.LCPI2_1) xvld $xr0, $a5, %pc_lo12(.LCPI2_1) - xvreplgr2vr.w $xr1, $a0 + pcalau12i $a5, %pc_hi20(.LCPI2_2) + xvld $xr1, $a5, %pc_lo12(.LCPI2_2) + xvreplgr2vr.w $xr2, $a0 addi.d $a5, $s0, 4 - xvldi $xr2, -800 + xvldi $xr3, -800 move $a6, $a4 .p2align 4, , 16 .LBB2_50: # %vector.body948 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr1, $xr0 - xvpermi.q $xr4, $xr3, 1 + xvmul.w $xr4, $xr2, $xr0 + xvpermi.q $xr5, $xr4, 1 + vext2xv.d.w $xr5, $xr5 + xvffint.d.l $xr5, $xr5 vext2xv.d.w $xr4, $xr4 + xvld $xr6, $a5, 0 xvffint.d.l $xr4, $xr4 - vext2xv.d.w $xr3, $xr3 - xvld $xr5, $a5, 0 - xvffint.d.l $xr3, $xr3 - xvfmul.d $xr3, $xr3, $xr2 - xvfmul.d $xr4, $xr4, $xr2 - xvpermi.q $xr6, $xr5, 1 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + xvfmul.d $xr4, $xr4, $xr3 + xvfmul.d $xr5, $xr5, $xr3 + xvpermi.q $xr7, $xr6, 1 + vreplvei.w $vr8, $vr7, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr7, 1 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 + vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr8, 16 + xvpermi.q $xr7, $xr9, 2 + vreplvei.w $vr8, $vr6, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 + fcvt.d.s $ft0, $ft0 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvpermi.q $xr6, $xr8, 2 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 + xvfadd.d $xr5, $xr7, $xr5 xvfadd.d $xr4, $xr6, $xr4 - xvfadd.d $xr3, $xr5, $xr3 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpermi.q $xr5, $xr6, 2 - xvst $xr5, $a5, 0 + xvfcvt.s.d $xr4, $xr5, $xr4 + xvperm.w $xr4, $xr4, $xr1 + xvst $xr4, $a5, 0 xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 @@ -2015,12 +1986,12 @@ H__align: # @H__align ld.d $a0, $a0, %pc_lo12(impmtx) st.d $a0, $sp, 256 # 8-byte Folded Spill ld.d $a0, $sp, 40 # 8-byte Folded Reload - ld.d $s5, $a0, %pc_lo12(H__align.ijp) + ld.d $fp, $a0, %pc_lo12(H__align.ijp) pcalau12i $a0, %pc_hi20(H__align.mp) ld.d $a0, $a0, %pc_lo12(H__align.mp) st.d $a0, $sp, 248 # 8-byte Folded Spill ld.d $a0, $sp, 88 # 8-byte Folded Reload - ld.d $fp, $a0, %pc_lo12(H__align.gappat2) + ld.d $s3, $a0, %pc_lo12(H__align.gappat2) ld.d $a0, $sp, 392 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.diaf1) st.d $a0, $sp, 240 # 8-byte Folded Spill @@ -2028,13 +1999,13 @@ H__align: # @H__align ld.d $a0, $a0, %pc_lo12(H__align.gappat1) st.d $a0, $sp, 232 # 8-byte Folded Spill ld.d $a0, $sp, 408 # 8-byte Folded Reload - ld.d $s0, $a0, %pc_lo12(H__align.diaf2) + ld.d $s5, $a0, %pc_lo12(H__align.diaf2) ld.d $a0, $sp, 272 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.gapf1) st.d $a0, $sp, 224 # 8-byte Folded Spill fcvt.d.s $fs1, $ft2 ld.d $a0, $sp, 280 # 8-byte Folded Reload - ld.d $s3, $a0, %pc_lo12(H__align.gapf2) + ld.d $s0, $a0, %pc_lo12(H__align.gapf2) ori $a0, $zero, 0 lu32i.d $a0, -1 add.d $a0, $a4, $a0 @@ -2173,7 +2144,7 @@ H__align: # @H__align fmadd.s $fa0, $ft3, $fs3, $fa0 ld.d $a0, $sp, 232 # 8-byte Folded Reload ldx.d $s6, $a0, $a3 - ldx.d $a3, $s5, $a3 + ldx.d $a3, $fp, $a3 ld.d $a0, $sp, 240 # 8-byte Folded Reload alsl.d $a4, $s4, $a0, 2 ld.d $a0, $sp, 224 # 8-byte Folded Reload @@ -2210,7 +2181,7 @@ H__align: # @H__align # Child Loop BB2_145 Depth 3 # Child Loop BB2_147 Depth 4 slli.d $a0, $t0, 3 - ldx.d $t5, $fp, $a0 + ldx.d $t5, $s3, $a0 fld.s $fa4, $t5, 4 fld.s $fa1, $t4, 0 fcmp.ceq.s $fcc0, $fa4, $fs2 @@ -2225,7 +2196,7 @@ H__align: # @H__align # in Loop: Header=BB2_118 Depth=2 addi.d $a3, $a3, 4 slli.d $t6, $t0, 2 - fldx.s $fa2, $s0, $t6 + fldx.s $fa2, $s5, $t6 fld.s $fa6, $a5, 0 fld.s $fa3, $s6, 4 st.w $zero, $a3, 0 @@ -2298,7 +2269,7 @@ H__align: # @H__align fmov.s $fs0, $fa4 .LBB2_130: # in Loop: Header=BB2_118 Depth=2 fld.s $fa4, $a4, 0 - fldx.s $fa5, $s3, $t6 + fldx.s $fa5, $s0, $t6 addi.d $t3, $t3, 4 fcvt.d.s $fa4, $fa4 fcvt.d.s $fa5, $fa5 @@ -2311,7 +2282,7 @@ H__align: # @H__align # %bb.131: # in Loop: Header=BB2_118 Depth=2 ld.w $a0, $t3, 0 slli.d $a2, $a0, 3 - ldx.d $a2, $s5, $a2 + ldx.d $a2, $fp, $a2 fcvt.d.s $fa3, $fa3 alsl.d $a2, $t0, $a2, 2 ld.w $a2, $a2, -4 @@ -2551,19 +2522,29 @@ H__align: # @H__align move $a3, $a2 bstrins.d $a3, $a5, 2, 0 xvreplgr2vr.w $xr0, $a0 - pcalau12i $a5, %pc_hi20(.LCPI2_2) - xvld $xr1, $a5, %pc_lo12(.LCPI2_2) pcalau12i $a5, %pc_hi20(.LCPI2_3) - xvld $xr2, $a5, %pc_lo12(.LCPI2_3) - xvreplgr2vr.d $xr3, $s7 + xvld $xr1, $a5, %pc_lo12(.LCPI2_3) + pcalau12i $a5, %pc_hi20(.LCPI2_4) + xvld $xr2, $a5, %pc_lo12(.LCPI2_4) + pcalau12i $a5, %pc_hi20(.LCPI2_2) + xvld $xr3, $a5, %pc_lo12(.LCPI2_2) + xvreplgr2vr.d $xr4, $s7 addi.d $a5, $s1, 4 - xvldi $xr4, -800 + xvldi $xr5, -800 move $a6, $a4 .p2align 4, , 16 .LBB2_170: # %vector.body1056 # =>This Inner Loop Header: Depth=1 - xvsub.d $xr5, $xr3, $xr1 - xvsub.d $xr6, $xr3, $xr2 + xvsub.d $xr6, $xr4, $xr1 + xvsub.d $xr7, $xr4, $xr2 + xvpickve2gr.d $a7, $xr7, 0 + vinsgr2vr.w $vr8, $a7, 0 + xvpickve2gr.d $a7, $xr7, 1 + vinsgr2vr.w $vr8, $a7, 1 + xvpickve2gr.d $a7, $xr7, 2 + vinsgr2vr.w $vr8, $a7, 2 + xvpickve2gr.d $a7, $xr7, 3 + vinsgr2vr.w $vr8, $a7, 3 xvpickve2gr.d $a7, $xr6, 0 vinsgr2vr.w $vr7, $a7, 0 xvpickve2gr.d $a7, $xr6, 1 @@ -2572,73 +2553,44 @@ H__align: # @H__align vinsgr2vr.w $vr7, $a7, 2 xvpickve2gr.d $a7, $xr6, 3 vinsgr2vr.w $vr7, $a7, 3 - xvpickve2gr.d $a7, $xr5, 0 - vinsgr2vr.w $vr6, $a7, 0 - xvpickve2gr.d $a7, $xr5, 1 - vinsgr2vr.w $vr6, $a7, 1 - xvpickve2gr.d $a7, $xr5, 2 - vinsgr2vr.w $vr6, $a7, 2 - xvpickve2gr.d $a7, $xr5, 3 - vinsgr2vr.w $vr6, $a7, 3 - xvpermi.q $xr7, $xr6, 2 - xvmul.w $xr5, $xr0, $xr7 - xvpermi.q $xr6, $xr5, 1 + xvpermi.q $xr8, $xr7, 2 + xvmul.w $xr6, $xr0, $xr8 + xvpermi.q $xr7, $xr6, 1 + vext2xv.d.w $xr7, $xr7 + xvffint.d.l $xr7, $xr7 vext2xv.d.w $xr6, $xr6 + xvld $xr8, $a5, 0 xvffint.d.l $xr6, $xr6 - vext2xv.d.w $xr5, $xr5 - xvld $xr7, $a5, 0 - xvffint.d.l $xr5, $xr5 - xvfmul.d $xr5, $xr5, $xr4 - xvfmul.d $xr6, $xr6, $xr4 - xvpermi.q $xr8, $xr7, 1 - vreplvei.w $vr9, $vr8, 3 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr10, $vr8, 2 + xvfmul.d $xr6, $xr6, $xr5 + xvfmul.d $xr7, $xr7, $xr5 + xvpermi.q $xr9, $xr8, 1 + vreplvei.w $vr10, $vr9, 3 + fcvt.d.s $ft2, $ft2 + vreplvei.w $vr11, $vr9, 2 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr10, 16 + vreplvei.w $vr10, $vr9, 1 fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr9, 16 - vreplvei.w $vr9, $vr8, 1 + vreplvei.w $vr9, $vr9, 0 fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr10, 16 + xvpermi.q $xr9, $xr11, 2 + vreplvei.w $vr10, $vr8, 3 + fcvt.d.s $ft2, $ft2 + vreplvei.w $vr11, $vr8, 2 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr10, 16 + vreplvei.w $vr10, $vr8, 1 + fcvt.d.s $ft2, $ft2 vreplvei.w $vr8, $vr8, 0 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr9, 16 - xvpermi.q $xr8, $xr10, 2 - vreplvei.w $vr9, $vr7, 3 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr10, $vr7, 2 - fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr9, 16 - vreplvei.w $vr9, $vr7, 1 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr7, $vr7, 0 - fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr9, 16 - xvpermi.q $xr7, $xr10, 2 + vextrins.d $vr8, $vr10, 16 + xvpermi.q $xr8, $xr11, 2 + xvfadd.d $xr7, $xr9, $xr7 xvfadd.d $xr6, $xr8, $xr6 - xvfadd.d $xr5, $xr7, $xr5 - xvpickve.d $xr7, $xr6, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr6, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr6, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr6, $xr6, 3 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr8, $vr6, 48 - xvpickve.d $xr6, $xr5, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr5, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr5, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 48 - xvpermi.q $xr7, $xr8, 2 - xvst $xr7, $a5, 0 + xvfcvt.s.d $xr6, $xr7, $xr6 + xvperm.w $xr6, $xr6, $xr3 + xvst $xr6, $a5, 0 xvaddi.du $xr2, $xr2, 8 xvaddi.du $xr1, $xr1, 8 addi.d $a6, $a6, -8 @@ -2694,8 +2646,8 @@ H__align: # @H__align move $a0, $a3 bstrins.d $a0, $a5, 1, 0 xvreplve0.d $xr2, $xr0 - pcalau12i $a5, %pc_hi20(.LCPI2_4) - vld $vr3, $a5, %pc_lo12(.LCPI2_4) + pcalau12i $a5, %pc_hi20(.LCPI2_5) + vld $vr3, $a5, %pc_lo12(.LCPI2_5) xvreplve0.d $xr4, $xr1 addi.d $a5, $a1, 4 xvldi $xr5, -800 @@ -2720,18 +2672,9 @@ H__align: # @H__align vextrins.d $vr7, $vr8, 16 xvpermi.q $xr7, $xr9, 2 xvfmadd.d $xr6, $xr4, $xr6, $xr7 - xvpickve.d $xr7, $xr6, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr6, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr6, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr6, $xr6, 3 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr8, $vr6, 48 - vst $vr8, $a5, 0 + xvpermi.q $xr7, $xr6, 1 + vfcvt.s.d $vr6, $vr7, $vr6 + vst $vr6, $a5, 0 vaddi.wu $vr3, $vr3, 4 addi.d $a6, $a6, -4 addi.d $a5, $a5, 16 @@ -2931,8 +2874,8 @@ H__align: # @H__align move $a3, $zero b .LBB2_221 .LBB2_210: # %vector.ph1181 - pcalau12i $a4, %pc_hi20(.LCPI2_5) - vld $vr0, $a4, %pc_lo12(.LCPI2_5) + pcalau12i $a4, %pc_hi20(.LCPI2_6) + vld $vr0, $a4, %pc_lo12(.LCPI2_6) bstrpick.d $a3, $a3, 31, 2 slli.d $a3, $a3, 2 addi.d $a4, $s3, 16 @@ -2984,8 +2927,8 @@ H__align: # @H__align move $a4, $zero b .LBB2_237 .LBB2_218: # %vector.ph1087 - pcalau12i $a4, %pc_hi20(.LCPI2_5) - vld $vr0, $a4, %pc_lo12(.LCPI2_5) + pcalau12i $a4, %pc_hi20(.LCPI2_6) + vld $vr0, $a4, %pc_lo12(.LCPI2_6) bstrpick.d $a3, $a3, 31, 2 slli.d $a3, $a3, 2 addi.d $a4, $ra, 16 @@ -3053,8 +2996,8 @@ H__align: # @H__align andi $a6, $a5, 12 bstrpick.d $a4, $a5, 31, 4 slli.d $a4, $a4, 4 - pcalau12i $a7, %pc_hi20(.LCPI2_6) - xvld $xr0, $a7, %pc_lo12(.LCPI2_6) + pcalau12i $a7, %pc_hi20(.LCPI2_7) + xvld $xr0, $a7, %pc_lo12(.LCPI2_7) addi.d $a7, $a2, 32 xvrepli.b $xr1, -1 xvrepli.w $xr2, -9 @@ -3076,8 +3019,8 @@ H__align: # @H__align beqz $a6, .LBB2_237 .LBB2_234: # %vec.epilog.ph1216 move $a6, $a4 - pcalau12i $a4, %pc_hi20(.LCPI2_7) - vld $vr0, $a4, %pc_lo12(.LCPI2_7) + pcalau12i $a4, %pc_hi20(.LCPI2_8) + vld $vr0, $a4, %pc_lo12(.LCPI2_8) bstrpick.d $a4, $a5, 31, 2 slli.d $a4, $a4, 2 vreplgr2vr.w $vr1, $a6 @@ -3359,8 +3302,8 @@ H__align: # @H__align andi $a7, $a6, 12 bstrpick.d $a5, $a6, 31, 4 slli.d $a5, $a5, 4 - pcalau12i $t0, %pc_hi20(.LCPI2_6) - xvld $xr0, $t0, %pc_lo12(.LCPI2_6) + pcalau12i $t0, %pc_hi20(.LCPI2_7) + xvld $xr0, $t0, %pc_lo12(.LCPI2_7) addi.d $t0, $a3, 32 xvrepli.b $xr1, -1 xvrepli.w $xr2, -9 @@ -3382,8 +3325,8 @@ H__align: # @H__align beqz $a7, .LBB2_283 .LBB2_280: # %vec.epilog.ph1121 move $a7, $a5 - pcalau12i $a5, %pc_hi20(.LCPI2_7) - vld $vr0, $a5, %pc_lo12(.LCPI2_7) + pcalau12i $a5, %pc_hi20(.LCPI2_8) + vld $vr0, $a5, %pc_lo12(.LCPI2_8) bstrpick.d $a5, $a6, 31, 2 slli.d $a5, $a5, 2 vreplgr2vr.w $vr1, $a7 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Lalignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Lalignmm.s index 3842ba2..46f0d9b 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Lalignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Lalignmm.s @@ -1,6 +1,17 @@ .file "Lalignmm.c" + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 # -- Begin function Lalignmm_hmout +.LCPI0_0: + .word 0 # 0x0 + .word 1 # 0x1 + .word 4 # 0x4 + .word 5 # 0x5 + .word 2 # 0x2 + .word 3 # 0x3 + .word 6 # 0x6 + .word 7 # 0x7 .text - .globl Lalignmm_hmout # -- Begin function Lalignmm_hmout + .globl Lalignmm_hmout .p2align 5 .type Lalignmm_hmout,@function Lalignmm_hmout: # @Lalignmm_hmout @@ -18,23 +29,23 @@ Lalignmm_hmout: # @Lalignmm_hmout st.d $s7, $sp, 480 # 8-byte Folded Spill st.d $s8, $sp, 472 # 8-byte Folded Spill fst.d $fs0, $sp, 464 # 8-byte Folded Spill - move $s6, $a7 + move $s4, $a7 move $s8, $a5 - move $s4, $a4 + move $s5, $a4 move $s1, $a3 move $s2, $a2 move $fp, $a1 move $s0, $a0 pcalau12i $a0, %got_pc_hi20(RNApenalty) ld.d $a0, $a0, %got_pc_lo12(RNApenalty) - ld.w $s5, $a0, 0 + ld.w $s6, $a0, 0 ld.d $a0, $s0, 0 pcaddu18i $ra, %call36(seqlen) jirl $ra, $ra, 0 ld.d $a0, $fp, 0 pcaddu18i $ra, %call36(seqlen) jirl $ra, $ra, 0 - st.d $s0, $sp, 128 # 8-byte Folded Spill + st.d $s0, $sp, 120 # 8-byte Folded Spill ld.d $a0, $s0, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 @@ -47,16 +58,16 @@ Lalignmm_hmout: # @Lalignmm_hmout move $s0, $a0 add.d $a0, $s7, $a0 addi.w $fp, $a0, 200 - move $a0, $s4 + move $a0, $s5 move $a1, $fp pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 - st.d $a0, $sp, 104 # 8-byte Folded Spill + st.d $a0, $sp, 56 # 8-byte Folded Spill move $a0, $s8 move $a1, $fp pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 - st.d $a0, $sp, 72 # 8-byte Folded Spill + st.d $a0, $sp, 224 # 8-byte Folded Spill ori $a0, $zero, 4 move $a1, $zero pcaddu18i $ra, %call36(AllocateFloatMtx) @@ -67,12 +78,12 @@ Lalignmm_hmout: # @Lalignmm_hmout pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 st.d $a0, $sp, 232 # 8-byte Folded Spill - st.d $s0, $sp, 112 # 8-byte Folded Spill + st.d $s0, $sp, 104 # 8-byte Folded Spill addi.w $s0, $s0, 102 move $a0, $s0 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - st.d $a0, $sp, 160 # 8-byte Folded Spill + st.d $a0, $sp, 152 # 8-byte Folded Spill move $a0, $s3 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 @@ -80,7 +91,7 @@ Lalignmm_hmout: # @Lalignmm_hmout move $a0, $s0 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - st.d $a0, $sp, 224 # 8-byte Folded Spill + st.d $a0, $sp, 216 # 8-byte Folded Spill ori $a1, $zero, 27 move $a0, $s3 pcaddu18i $ra, %call36(AllocateFloatMtx) @@ -91,34 +102,34 @@ Lalignmm_hmout: # @Lalignmm_hmout pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 st.d $a0, $sp, 376 # 8-byte Folded Spill - st.d $s7, $sp, 168 # 8-byte Folded Spill + st.d $s7, $sp, 160 # 8-byte Folded Spill addi.w $a0, $s7, 0 st.d $a0, $sp, 440 # 8-byte Folded Spill - st.d $s4, $sp, 120 # 8-byte Folded Spill - blez $s4, .LBB0_4 + st.d $s5, $sp, 112 # 8-byte Folded Spill + blez $s5, .LBB0_4 # %bb.1: # %.lr.ph move $fp, $zero - ld.d $s4, $sp, 120 # 8-byte Folded Reload - ld.d $s7, $sp, 128 # 8-byte Folded Reload + ld.d $s5, $sp, 112 # 8-byte Folded Reload + ld.d $s7, $sp, 120 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_2: # =>This Inner Loop Header: Depth=1 ld.d $a0, $s7, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 ld.d $a1, $sp, 440 # 8-byte Folded Reload - bne $a0, $a1, .LBB0_254 + bne $a0, $a1, .LBB0_255 # %bb.3: # in Loop: Header=BB0_2 Depth=1 addi.w $fp, $fp, 1 - addi.d $s4, $s4, -1 + addi.d $s5, $s5, -1 addi.d $s7, $s7, 8 - bnez $s4, .LBB0_2 + bnez $s5, .LBB0_2 .LBB0_4: # %.preheader184 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload addi.w $fp, $a0, 0 st.d $s8, $sp, 88 # 8-byte Folded Spill blez $s8, .LBB0_8 # %bb.5: # %.lr.ph191 - move $s4, $zero + move $s5, $zero ld.d $s7, $sp, 88 # 8-byte Folded Reload ld.d $s8, $sp, 80 # 8-byte Folded Reload .p2align 4, , 16 @@ -126,24 +137,24 @@ Lalignmm_hmout: # @Lalignmm_hmout ld.d $a0, $s8, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 - bne $a0, $fp, .LBB0_255 + bne $a0, $fp, .LBB0_256 # %bb.7: # in Loop: Header=BB0_6 Depth=1 - addi.w $s4, $s4, 1 + addi.w $s5, $s5, 1 addi.d $s7, $s7, -1 addi.d $s8, $s8, 8 bnez $s7, .LBB0_6 .LBB0_8: # %._crit_edge st.d $s3, $sp, 456 # 8-byte Folded Spill - movgr2fr.w $fa0, $s5 + movgr2fr.w $fa0, $s6 ffint.s.w $fs0, $fa0 - ld.d $s4, $sp, 128 # 8-byte Folded Reload - move $a0, $s4 + ld.d $s5, $sp, 120 # 8-byte Folded Reload + move $a0, $s5 ld.d $a1, $sp, 384 # 8-byte Folded Reload move $a2, $s2 ld.d $s3, $sp, 440 # 8-byte Folded Reload move $a3, $s3 - ld.d $s5, $sp, 120 # 8-byte Folded Reload - move $a4, $s5 + ld.d $s6, $sp, 112 # 8-byte Folded Reload + move $a4, $s6 pcaddu18i $ra, %call36(MScpmx_calc_new) jirl $ra, $ra, 0 ld.d $s8, $sp, 80 # 8-byte Folded Reload @@ -155,58 +166,54 @@ Lalignmm_hmout: # @Lalignmm_hmout move $a4, $s7 pcaddu18i $ra, %call36(MScpmx_calc_new) jirl $ra, $ra, 0 - beqz $s6, .LBB0_10 + beqz $s4, .LBB0_10 # %bb.9: - ld.d $s4, $sp, 576 - ld.d $s5, $sp, 560 + ld.d $s5, $sp, 576 + ld.d $s6, $sp, 560 ld.d $a0, $sp, 232 # 8-byte Folded Reload - ld.d $a1, $sp, 120 # 8-byte Folded Reload - ld.d $a2, $sp, 128 # 8-byte Folded Reload + ld.d $a1, $sp, 112 # 8-byte Folded Reload + ld.d $a2, $sp, 120 # 8-byte Folded Reload move $a3, $s2 move $a4, $s3 - move $a5, $s6 + move $a5, $s4 pcaddu18i $ra, %call36(new_OpeningGapCount) jirl $ra, $ra, 0 - ld.d $a0, $sp, 160 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload move $a1, $s7 move $a2, $s8 move $a3, $s1 move $a4, $fp - move $a5, $s5 - ld.d $s5, $sp, 120 # 8-byte Folded Reload + move $a5, $s6 + ld.d $s6, $sp, 112 # 8-byte Folded Reload pcaddu18i $ra, %call36(new_OpeningGapCount) jirl $ra, $ra, 0 ld.d $a0, $sp, 392 # 8-byte Folded Reload - move $a1, $s5 - ld.d $a2, $sp, 128 # 8-byte Folded Reload + move $a1, $s6 + ld.d $a2, $sp, 120 # 8-byte Folded Reload move $a3, $s2 move $a4, $s3 - move $a5, $s4 + move $a5, $s5 pcaddu18i $ra, %call36(new_FinalGapCount) jirl $ra, $ra, 0 - ld.d $a0, $sp, 224 # 8-byte Folded Reload + ld.d $a0, $sp, 216 # 8-byte Folded Reload move $a1, $s7 move $a2, $s8 move $a3, $s1 move $a4, $fp - move $a5, $s4 - ld.d $s4, $sp, 128 # 8-byte Folded Reload + move $a5, $s5 + ld.d $s5, $sp, 120 # 8-byte Folded Reload pcaddu18i $ra, %call36(new_FinalGapCount) jirl $ra, $ra, 0 - fcvt.d.s $fa0, $fs0 - ld.d $a5, $sp, 224 # 8-byte Folded Reload - ld.d $s1, $sp, 112 # 8-byte Folded Reload - bgtz $s3, .LBB0_11 - b .LBB0_17 + b .LBB0_11 .LBB0_10: ld.d $a0, $sp, 232 # 8-byte Folded Reload - move $a1, $s5 - move $a2, $s4 + move $a1, $s6 + move $a2, $s5 move $a3, $s2 move $a4, $s3 pcaddu18i $ra, %call36(st_OpeningGapCount) jirl $ra, $ra, 0 - ld.d $a0, $sp, 160 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload move $a1, $s7 move $a2, $s8 move $a3, $s1 @@ -214,39 +221,41 @@ Lalignmm_hmout: # @Lalignmm_hmout pcaddu18i $ra, %call36(st_OpeningGapCount) jirl $ra, $ra, 0 ld.d $a0, $sp, 392 # 8-byte Folded Reload - move $a1, $s5 - move $a2, $s4 + move $a1, $s6 + move $a2, $s5 move $a3, $s2 move $a4, $s3 pcaddu18i $ra, %call36(st_FinalGapCount) jirl $ra, $ra, 0 - ld.d $a0, $sp, 224 # 8-byte Folded Reload + ld.d $a0, $sp, 216 # 8-byte Folded Reload move $a1, $s7 move $a2, $s8 move $a3, $s1 move $a4, $fp pcaddu18i $ra, %call36(st_FinalGapCount) jirl $ra, $ra, 0 +.LBB0_11: + ld.d $s4, $sp, 56 # 8-byte Folded Reload fcvt.d.s $fa0, $fs0 - ld.d $a5, $sp, 224 # 8-byte Folded Reload - ld.d $s1, $sp, 112 # 8-byte Folded Reload - blez $s3, .LBB0_17 -.LBB0_11: # %.lr.ph194 - ld.d $a0, $sp, 168 # 8-byte Folded Reload + ld.d $a5, $sp, 216 # 8-byte Folded Reload + ld.d $s1, $sp, 104 # 8-byte Folded Reload + blez $s3, .LBB0_18 +# %bb.12: # %.lr.ph194 + ld.d $a0, $sp, 160 # 8-byte Folded Reload bstrpick.d $a0, $a0, 30, 0 ori $a1, $zero, 8 - bltu $a0, $a1, .LBB0_14 -# %bb.12: # %vector.memcheck - ld.d $a3, $sp, 392 # 8-byte Folded Reload - alsl.d $a1, $a0, $a3, 2 - ld.d $a2, $sp, 232 # 8-byte Folded Reload - bgeu $a2, $a1, .LBB0_67 + bltu $a0, $a1, .LBB0_15 # %bb.13: # %vector.memcheck - alsl.d $a1, $a0, $a2, 2 - bgeu $a3, $a1, .LBB0_67 -.LBB0_14: + ld.d $a4, $sp, 392 # 8-byte Folded Reload + alsl.d $a1, $a0, $a4, 2 + ld.d $a3, $sp, 232 # 8-byte Folded Reload + bgeu $a3, $a1, .LBB0_68 +# %bb.14: # %vector.memcheck + alsl.d $a1, $a0, $a3, 2 + bgeu $a4, $a1, .LBB0_68 +.LBB0_15: move $a1, $zero -.LBB0_15: # %scalar.ph.preheader +.LBB0_16: # %scalar.ph.preheader ld.d $a2, $sp, 392 # 8-byte Folded Reload alsl.d $a2, $a1, $a2, 2 ld.d $a3, $sp, 232 # 8-byte Folded Reload @@ -255,7 +264,7 @@ Lalignmm_hmout: # @Lalignmm_hmout vldi $vr1, -912 vldi $vr2, -928 .p2align 4, , 16 -.LBB0_16: # %scalar.ph +.LBB0_17: # %scalar.ph # =>This Inner Loop Header: Depth=1 fld.s $fa3, $a3, 0 fcvt.d.s $fa3, $fa3 @@ -274,31 +283,31 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a2, $a2, 4 addi.d $a0, $a0, -1 addi.d $a3, $a3, 4 - bnez $a0, .LBB0_16 -.LBB0_17: # %.preheader183 - blez $fp, .LBB0_24 -# %bb.18: # %.lr.ph196 + bnez $a0, .LBB0_17 +.LBB0_18: # %.preheader183 + blez $fp, .LBB0_25 +# %bb.19: # %.lr.ph196 bstrpick.d $a0, $s1, 30, 0 ori $a1, $zero, 8 - bltu $a0, $a1, .LBB0_21 -# %bb.19: # %vector.memcheck258 - alsl.d $a1, $a0, $a5, 2 - ld.d $a2, $sp, 160 # 8-byte Folded Reload - bgeu $a2, $a1, .LBB0_70 + bltu $a0, $a1, .LBB0_22 # %bb.20: # %vector.memcheck258 - alsl.d $a1, $a0, $a2, 2 - bgeu $a5, $a1, .LBB0_70 -.LBB0_21: + alsl.d $a1, $a0, $a5, 2 + ld.d $a3, $sp, 152 # 8-byte Folded Reload + bgeu $a3, $a1, .LBB0_71 +# %bb.21: # %vector.memcheck258 + alsl.d $a1, $a0, $a3, 2 + bgeu $a5, $a1, .LBB0_71 +.LBB0_22: move $a1, $zero -.LBB0_22: # %scalar.ph264.preheader +.LBB0_23: # %scalar.ph264.preheader alsl.d $a2, $a1, $a5, 2 - ld.d $a3, $sp, 160 # 8-byte Folded Reload + ld.d $a3, $sp, 152 # 8-byte Folded Reload alsl.d $a3, $a1, $a3, 2 sub.d $a0, $a0, $a1 vldi $vr1, -912 vldi $vr2, -928 .p2align 4, , 16 -.LBB0_23: # %scalar.ph264 +.LBB0_24: # %scalar.ph264 # =>This Inner Loop Header: Depth=1 fld.s $fa3, $a3, 0 fcvt.d.s $fa3, $fa3 @@ -317,8 +326,8 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a2, $a2, 4 addi.d $a0, $a0, -1 addi.d $a3, $a3, 4 - bnez $a0, .LBB0_23 -.LBB0_24: # %._crit_edge197 + bnez $a0, .LBB0_24 +.LBB0_25: # %._crit_edge197 ld.d $a2, $sp, 96 # 8-byte Folded Reload ld.d $a0, $sp, 232 # 8-byte Folded Reload st.d $a0, $a2, 0 @@ -326,14 +335,14 @@ Lalignmm_hmout: # @Lalignmm_hmout ld.w $a1, $a0, %pc_lo12(reccycle) ld.d $a3, $sp, 392 # 8-byte Folded Reload st.d $a3, $a2, 8 - ld.d $a3, $sp, 160 # 8-byte Folded Reload + ld.d $a3, $sp, 152 # 8-byte Folded Reload st.d $a3, $a2, 16 st.d $a5, $a2, 24 addi.d $a1, $a1, 1 st.w $a1, $a0, %pc_lo12(reccycle) - blez $fp, .LBB0_57 -# %bb.25: - move $a0, $s5 + blez $fp, .LBB0_58 +# %bb.26: + move $a0, $s6 move $a1, $zero pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 @@ -342,25 +351,23 @@ Lalignmm_hmout: # @Lalignmm_hmout move $a1, $zero pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 - blez $s5, .LBB0_33 -# %bb.26: # %.lr.ph.preheader.i + blez $s6, .LBB0_34 +# %bb.27: # %.lr.ph.preheader.i ori $a2, $zero, 8 move $a1, $zero - bltu $s5, $a2, .LBB0_31 -# %bb.27: # %.lr.ph.preheader.i - ld.d $a2, $sp, 104 # 8-byte Folded Reload - sub.d $a2, $fp, $a2 + bltu $s6, $a2, .LBB0_32 +# %bb.28: # %.lr.ph.preheader.i + sub.d $a2, $fp, $s4 ori $a3, $zero, 64 - bltu $a2, $a3, .LBB0_31 -# %bb.28: # %vector.ph282 - bstrpick.d $a1, $s5, 30, 3 + bltu $a2, $a3, .LBB0_32 +# %bb.29: # %vector.ph282 + bstrpick.d $a1, $s6, 30, 3 slli.d $a1, $a1, 3 addi.d $a2, $fp, 32 - ld.d $a3, $sp, 104 # 8-byte Folded Reload - addi.d $a3, $a3, 32 + addi.d $a3, $s4, 32 move $a4, $a1 .p2align 4, , 16 -.LBB0_29: # %vector.body285 +.LBB0_30: # %vector.body285 # =>This Inner Loop Header: Depth=1 xvld $xr0, $a3, -32 xvld $xr1, $a3, 0 @@ -369,43 +376,42 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a4, $a4, -8 addi.d $a2, $a2, 64 addi.d $a3, $a3, 64 - bnez $a4, .LBB0_29 -# %bb.30: # %middle.block290 - beq $a1, $s5, .LBB0_33 -.LBB0_31: # %.lr.ph.i.preheader - sub.d $a2, $s5, $a1 + bnez $a4, .LBB0_30 +# %bb.31: # %middle.block290 + beq $a1, $s6, .LBB0_34 +.LBB0_32: # %.lr.ph.i.preheader + sub.d $a2, $s6, $a1 alsl.d $a3, $a1, $fp, 3 - ld.d $a4, $sp, 104 # 8-byte Folded Reload - alsl.d $a1, $a1, $a4, 3 + alsl.d $a1, $a1, $s4, 3 .p2align 4, , 16 -.LBB0_32: # %.lr.ph.i +.LBB0_33: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 ld.d $a4, $a1, 0 st.d $a4, $a3, 0 addi.d $a2, $a2, -1 addi.d $a3, $a3, 8 addi.d $a1, $a1, 8 - bnez $a2, .LBB0_32 -.LBB0_33: # %.preheader13.i - blez $s7, .LBB0_41 -# %bb.34: # %.lr.ph17.preheader.i + bnez $a2, .LBB0_33 +.LBB0_34: # %.preheader13.i + blez $s7, .LBB0_42 +# %bb.35: # %.lr.ph17.preheader.i ori $a2, $zero, 8 move $a1, $zero - bltu $s7, $a2, .LBB0_39 -# %bb.35: # %.lr.ph17.preheader.i - ld.d $a2, $sp, 72 # 8-byte Folded Reload + bltu $s7, $a2, .LBB0_40 +# %bb.36: # %.lr.ph17.preheader.i + ld.d $a2, $sp, 224 # 8-byte Folded Reload sub.d $a2, $a0, $a2 ori $a3, $zero, 64 - bltu $a2, $a3, .LBB0_39 -# %bb.36: # %vector.ph297 + bltu $a2, $a3, .LBB0_40 +# %bb.37: # %vector.ph297 bstrpick.d $a1, $s7, 30, 3 slli.d $a1, $a1, 3 addi.d $a2, $a0, 32 - ld.d $a3, $sp, 72 # 8-byte Folded Reload + ld.d $a3, $sp, 224 # 8-byte Folded Reload addi.d $a3, $a3, 32 move $a4, $a1 .p2align 4, , 16 -.LBB0_37: # %vector.body300 +.LBB0_38: # %vector.body300 # =>This Inner Loop Header: Depth=1 xvld $xr0, $a3, -32 xvld $xr1, $a3, 0 @@ -414,35 +420,35 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a4, $a4, -8 addi.d $a2, $a2, 64 addi.d $a3, $a3, 64 - bnez $a4, .LBB0_37 -# %bb.38: # %middle.block305 - beq $a1, $s7, .LBB0_41 -.LBB0_39: # %.lr.ph17.i.preheader + bnez $a4, .LBB0_38 +# %bb.39: # %middle.block305 + beq $a1, $s7, .LBB0_42 +.LBB0_40: # %.lr.ph17.i.preheader sub.d $a2, $s7, $a1 alsl.d $a3, $a1, $a0, 3 - ld.d $a4, $sp, 72 # 8-byte Folded Reload + ld.d $a4, $sp, 224 # 8-byte Folded Reload alsl.d $a1, $a1, $a4, 3 .p2align 4, , 16 -.LBB0_40: # %.lr.ph17.i +.LBB0_41: # %.lr.ph17.i # =>This Inner Loop Header: Depth=1 ld.d $a4, $a1, 0 st.d $a4, $a3, 0 addi.d $a2, $a2, -1 addi.d $a3, $a3, 8 addi.d $a1, $a1, 8 - bnez $a2, .LBB0_40 -.LBB0_41: # %._crit_edge.i + bnez $a2, .LBB0_41 +.LBB0_42: # %._crit_edge.i ori $a1, $zero, 10 - blt $s3, $a1, .LBB0_66 -# %bb.42: # %._crit_edge.i + blt $s3, $a1, .LBB0_67 +# %bb.43: # %._crit_edge.i addi.w $a2, $s1, 0 ori $a1, $zero, 9 - bgeu $a1, $a2, .LBB0_66 -# %bb.43: # %iter.check - ld.d $a0, $sp, 168 # 8-byte Folded Reload - addi.w $s2, $a0, 100 + bgeu $a1, $a2, .LBB0_67 +# %bb.44: # %iter.check + ld.d $s6, $sp, 160 # 8-byte Folded Reload move $s8, $a2 - addi.w $s3, $s1, 100 + addi.w $s3, $s6, 100 + addi.w $s7, $s1, 100 move $a0, $s0 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 @@ -466,19 +472,19 @@ Lalignmm_hmout: # @Lalignmm_hmout move $a0, $s0 pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 - st.d $a0, $sp, 208 # 8-byte Folded Spill + st.d $a0, $sp, 200 # 8-byte Folded Spill move $a0, $s0 pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 - st.d $a0, $sp, 216 # 8-byte Folded Spill + st.d $a0, $sp, 208 # 8-byte Folded Spill move $a0, $s0 pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 - st.d $a0, $sp, 64 # 8-byte Folded Spill + st.d $a0, $sp, 72 # 8-byte Folded Spill move $a0, $s0 pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 - st.d $a0, $sp, 56 # 8-byte Folded Spill + st.d $a0, $sp, 64 # 8-byte Folded Spill move $a0, $s0 pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 @@ -495,41 +501,41 @@ Lalignmm_hmout: # @Lalignmm_hmout move $a0, $s1 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - st.d $a0, $sp, 192 # 8-byte Folded Spill + st.d $a0, $sp, 184 # 8-byte Folded Spill move $a0, $s0 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - st.d $a0, $sp, 200 # 8-byte Folded Spill + st.d $a0, $sp, 192 # 8-byte Folded Spill move $a0, $s0 pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 st.d $a0, $sp, 448 # 8-byte Folded Spill - sltu $a0, $s3, $s2 - masknez $a1, $s3, $a0 - maskeqz $a0, $s2, $a0 + sltu $a0, $s7, $s3 + masknez $a1, $s7, $a0 + maskeqz $a0, $s3, $a0 or $a0, $a0, $a1 - addi.w $s7, $a0, 2 - move $a0, $s7 + addi.w $s2, $a0, 2 + move $a0, $s2 pcaddu18i $ra, %call36(AllocateCharVec) jirl $ra, $ra, 0 ori $a1, $zero, 26 - move $a0, $s7 + move $a0, $s2 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 move $s5, $a0 ori $a1, $zero, 26 - move $a0, $s7 + move $a0, $s2 pcaddu18i $ra, %call36(AllocateIntMtx) jirl $ra, $ra, 0 - move $s6, $a0 - move $a0, $s2 - move $a1, $s3 + move $s2, $a0 + move $a0, $s3 + move $a1, $s7 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 st.d $a0, $sp, 368 # 8-byte Folded Spill - move $a0, $s2 - move $a1, $s3 + move $a0, $s3 move $s3, $fp + move $a1, $s7 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 st.d $a0, $sp, 320 # 8-byte Folded Spill @@ -543,7 +549,7 @@ Lalignmm_hmout: # @Lalignmm_hmout move $a3, $zero ld.d $a4, $sp, 440 # 8-byte Folded Reload move $a5, $s5 - move $a6, $s6 + move $a6, $s2 pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 ori $a7, $zero, 1 @@ -555,12 +561,11 @@ Lalignmm_hmout: # @Lalignmm_hmout move $a4, $s8 st.d $s5, $sp, 336 # 8-byte Folded Spill move $a5, $s5 - st.d $s6, $sp, 328 # 8-byte Folded Spill - move $a6, $s6 + st.d $s2, $sp, 328 # 8-byte Folded Spill + move $a6, $s2 pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 - ld.d $a0, $sp, 168 # 8-byte Folded Reload - addi.w $a1, $a0, 1 + addi.w $a1, $s6, 1 ori $a0, $zero, 2 slt $a2, $a0, $a1 masknez $a0, $a0, $a2 @@ -569,22 +574,22 @@ Lalignmm_hmout: # @Lalignmm_hmout ori $a0, $zero, 5 ld.d $s6, $sp, 232 # 8-byte Folded Reload addi.d $s5, $s6, 4 - bge $a1, $a0, .LBB0_73 -# %bb.44: - ld.d $s7, $sp, 160 # 8-byte Folded Reload + bge $a1, $a0, .LBB0_74 +# %bb.45: + ld.d $s7, $sp, 152 # 8-byte Folded Reload ld.d $a6, $sp, 392 # 8-byte Folded Reload - ld.d $a7, $sp, 224 # 8-byte Folded Reload -.LBB0_45: - ld.d $t0, $sp, 112 # 8-byte Folded Reload - ld.d $t1, $sp, 200 # 8-byte Folded Reload + ld.d $a7, $sp, 216 # 8-byte Folded Reload +.LBB0_46: + ld.d $t0, $sp, 104 # 8-byte Folded Reload + ld.d $t1, $sp, 192 # 8-byte Folded Reload ld.d $t2, $sp, 368 # 8-byte Folded Reload -.LBB0_46: # %.lr.ph20.i.preheader +.LBB0_47: # %.lr.ph20.i.preheader sub.d $a0, $t7, $fp alsl.d $a1, $fp, $s4, 2 alsl.d $a2, $fp, $a6, 2 addi.d $a2, $a2, -4 .p2align 4, , 16 -.LBB0_47: # %.lr.ph20.i +.LBB0_48: # %.lr.ph20.i # =>This Inner Loop Header: Depth=1 fld.s $fa0, $s6, 0 fld.s $fa1, $a2, 0 @@ -595,9 +600,9 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a0, $a0, -1 addi.d $a1, $a1, 4 addi.d $a2, $a2, 4 - bnez $a0, .LBB0_47 -.LBB0_48: # %iter.check362 - ld.d $a0, $sp, 168 # 8-byte Folded Reload + bnez $a0, .LBB0_48 +.LBB0_49: # %iter.check362 + ld.d $a0, $sp, 160 # 8-byte Folded Reload addi.w $t4, $a0, -1 addi.w $t3, $t0, -1 addi.w $t6, $t0, 1 @@ -612,14 +617,14 @@ Lalignmm_hmout: # @Lalignmm_hmout ori $a3, $zero, 5 alsl.d $a1, $a0, $s3, 2 addi.d $a4, $s7, 4 - bge $t6, $a3, .LBB0_77 -.LBB0_49: # %.lr.ph22.i.preheader + bge $t6, $a3, .LBB0_78 +.LBB0_50: # %.lr.ph22.i.preheader sub.d $a3, $a0, $a2 alsl.d $a4, $a2, $s3, 2 alsl.d $a2, $a2, $a7, 2 addi.d $a2, $a2, -4 .p2align 4, , 16 -.LBB0_50: # %.lr.ph22.i +.LBB0_51: # %.lr.ph22.i # =>This Inner Loop Header: Depth=1 fld.s $fa0, $s7, 0 fld.s $fa1, $a2, 0 @@ -630,8 +635,8 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a3, $a3, -1 addi.d $a4, $a4, 4 addi.d $a2, $a2, 4 - bnez $a3, .LBB0_50 -.LBB0_51: # %.lr.ph27.preheader.i + bnez $a3, .LBB0_51 +.LBB0_52: # %.lr.ph27.preheader.i fld.s $fa0, $s4, 0 ld.d $a2, $t2, 0 ld.d $t5, $sp, 584 @@ -640,36 +645,36 @@ Lalignmm_hmout: # @Lalignmm_hmout ori $a4, $zero, 4 ori $a5, $zero, 8 .p2align 4, , 16 -.LBB0_52: # %.lr.ph27.i +.LBB0_53: # %.lr.ph27.i # =>This Inner Loop Header: Depth=1 fldx.s $fa0, $s4, $a4 ldx.d $a6, $t2, $a5 fst.s $fa0, $a6, 0 addi.d $a4, $a4, 4 addi.d $a5, $a5, 8 - bne $a3, $a4, .LBB0_52 -# %bb.53: # %iter.check403 + bne $a3, $a4, .LBB0_53 +# %bb.54: # %iter.check403 ori $a4, $zero, 5 ori $a3, $zero, 1 - blt $t6, $a4, .LBB0_88 -# %bb.54: # %iter.check403 + blt $t6, $a4, .LBB0_89 +# %bb.55: # %iter.check403 sub.d $a4, $a2, $s3 ori $a5, $zero, 64 - bltu $a4, $a5, .LBB0_88 -# %bb.55: # %vector.main.loop.iter.check405 + bltu $a4, $a5, .LBB0_89 +# %bb.56: # %vector.main.loop.iter.check405 ori $a3, $zero, 17 - bge $t6, $a3, .LBB0_81 -# %bb.56: + bge $t6, $a3, .LBB0_82 +# %bb.57: move $a4, $zero - b .LBB0_85 -.LBB0_57: # %.preheader1.i - blez $s5, .LBB0_60 -# %bb.58: # %.lr.ph114.i - ld.d $fp, $sp, 104 # 8-byte Folded Reload - move $s0, $s4 - move $s1, $s5 + b .LBB0_86 +.LBB0_58: # %.preheader1.i + blez $s6, .LBB0_61 +# %bb.59: # %.lr.ph114.i + move $fp, $s4 + move $s0, $s5 + move $s1, $s6 .p2align 4, , 16 -.LBB0_59: # =>This Inner Loop Header: Depth=1 +.LBB0_60: # =>This Inner Loop Header: Depth=1 ld.d $a0, $fp, 0 ld.d $a1, $s0, 0 move $a2, $s3 @@ -680,33 +685,34 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $s1, $s1, -1 addi.d $s0, $s0, 8 addi.d $fp, $fp, 8 - bnez $s1, .LBB0_59 -.LBB0_60: # %.preheader.i - ld.d $s6, $sp, 72 # 8-byte Folded Reload - blez $s7, .LBB0_176 -# %bb.61: # %.lr.ph121.i + bnez $s1, .LBB0_60 +.LBB0_61: # %.preheader.i + blez $s7, .LBB0_177 +# %bb.62: # %.lr.ph121.i move $s0, $zero ori $s1, $zero, 45 - b .LBB0_63 + b .LBB0_64 .p2align 4, , 16 -.LBB0_62: # %._crit_edge119.i - # in Loop: Header=BB0_63 Depth=1 +.LBB0_63: # %._crit_edge119.i + # in Loop: Header=BB0_64 Depth=1 addi.d $s0, $s0, 1 ld.d $s3, $sp, 440 # 8-byte Folded Reload - beq $s0, $s7, .LBB0_176 -.LBB0_63: # =>This Loop Header: Depth=1 - # Child Loop BB0_65 Depth 2 + beq $s0, $s7, .LBB0_177 +.LBB0_64: # =>This Loop Header: Depth=1 + # Child Loop BB0_66 Depth 2 slli.d $a0, $s0, 3 - ldx.d $a0, $s6, $a0 + ld.d $a1, $sp, 224 # 8-byte Folded Reload + ldx.d $a0, $a1, $a0 st.b $zero, $a0, 0 - blez $s3, .LBB0_62 -# %bb.64: # %.lr.ph118.i.preheader - # in Loop: Header=BB0_63 Depth=1 - alsl.d $s2, $s0, $s6, 3 - ld.d $s3, $sp, 168 # 8-byte Folded Reload + blez $s3, .LBB0_63 +# %bb.65: # %.lr.ph118.i.preheader + # in Loop: Header=BB0_64 Depth=1 + ld.d $a0, $sp, 224 # 8-byte Folded Reload + alsl.d $s2, $s0, $a0, 3 + ld.d $s3, $sp, 160 # 8-byte Folded Reload .p2align 4, , 16 -.LBB0_65: # %.lr.ph118.i - # Parent Loop BB0_63 Depth=1 +.LBB0_66: # %.lr.ph118.i + # Parent Loop BB0_64 Depth=1 # => This Inner Loop Header: Depth=2 ld.d $fp, $s2, 0 move $a0, $fp @@ -714,9 +720,9 @@ Lalignmm_hmout: # @Lalignmm_hmout jirl $ra, $ra, 0 addi.w $s3, $s3, -1 stx.h $s1, $fp, $a0 - bnez $s3, .LBB0_65 - b .LBB0_62 -.LBB0_66: + bnez $s3, .LBB0_66 + b .LBB0_63 +.LBB0_67: move $s0, $a0 move $a0, $fp pcaddu18i $ra, %call36(free) @@ -724,307 +730,229 @@ Lalignmm_hmout: # @Lalignmm_hmout move $a0, $s0 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $s6, $sp, 72 # 8-byte Folded Reload - b .LBB0_176 -.LBB0_67: # %vector.ph - ld.d $a1, $sp, 168 # 8-byte Folded Reload + b .LBB0_177 +.LBB0_68: # %vector.ph + ld.d $a1, $sp, 160 # 8-byte Folded Reload bstrpick.d $a1, $a1, 30, 3 slli.d $a1, $a1, 3 - xvreplve0.d $xr1, $xr0 - xvldi $xr2, -912 - xvldi $xr3, -928 + pcalau12i $a2, %pc_hi20(.LCPI0_0) + xvld $xr1, $a2, %pc_lo12(.LCPI0_0) + xvreplve0.d $xr2, $xr0 + xvldi $xr3, -912 + xvldi $xr4, -928 + move $a2, $a3 + move $a3, $a4 move $a4, $a1 .p2align 4, , 16 -.LBB0_68: # %vector.body +.LBB0_69: # %vector.body # =>This Inner Loop Header: Depth=1 - xvld $xr4, $a2, 0 - xvpermi.q $xr5, $xr4, 1 - vreplvei.w $vr6, $vr5, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr5, 2 + xvld $xr5, $a2, 0 + xvpermi.q $xr6, $xr5, 1 + vreplvei.w $vr7, $vr6, 3 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr5, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr6, 16 - xvpermi.q $xr5, $xr7, 2 - vreplvei.w $vr6, $vr4, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr4, 2 + vreplvei.w $vr8, $vr6, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr7, 16 + vreplvei.w $vr7, $vr6, 1 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr4, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr4, $vr4, 0 - fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr6, 16 - xvpermi.q $xr4, $xr7, 2 - xvfsub.d $xr5, $xr2, $xr5 - xvfsub.d $xr4, $xr2, $xr4 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr4, $xr4, $xr3 - xvfmul.d $xr5, $xr5, $xr1 - xvfmul.d $xr4, $xr4, $xr1 - xvpickve.d $xr6, $xr5, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr5, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr5, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 48 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - xvld $xr5, $a3, 0 - vextrins.w $vr6, $vr4, 48 - xvpermi.q $xr6, $xr7, 2 - xvst $xr6, $a2, 0 - xvpermi.q $xr4, $xr5, 1 - vreplvei.w $vr6, $vr4, 3 + vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr4, 2 + vextrins.d $vr6, $vr7, 16 + xvpermi.q $xr6, $xr8, 2 + vreplvei.w $vr7, $vr5, 3 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr4, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr4, $vr4, 0 - fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr6, 16 - xvpermi.q $xr4, $xr7, 2 + vreplvei.w $vr8, $vr5, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr7, 16 + vreplvei.w $vr7, $vr5, 1 + fcvt.d.s $fa7, $fa7 + vreplvei.w $vr5, $vr5, 0 + fcvt.d.s $fa5, $fa5 + vextrins.d $vr5, $vr7, 16 + xvpermi.q $xr5, $xr8, 2 + xvfsub.d $xr6, $xr3, $xr6 + xvfsub.d $xr5, $xr3, $xr5 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr5, $xr5, $xr4 + xvfmul.d $xr6, $xr6, $xr2 + xvfmul.d $xr5, $xr5, $xr2 + xvld $xr7, $a3, 0 + xvfcvt.s.d $xr5, $xr6, $xr5 + xvperm.w $xr5, $xr5, $xr1 + xvst $xr5, $a2, 0 + xvpermi.q $xr5, $xr7, 1 vreplvei.w $vr6, $vr5, 3 fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr5, 2 - fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 + vreplvei.w $vr8, $vr5, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr6, 16 vreplvei.w $vr6, $vr5, 1 fcvt.d.s $fa6, $fa6 vreplvei.w $vr5, $vr5, 0 fcvt.d.s $fa5, $fa5 vextrins.d $vr5, $vr6, 16 - xvpermi.q $xr5, $xr7, 2 - xvfsub.d $xr4, $xr2, $xr4 - xvfsub.d $xr5, $xr2, $xr5 - xvfmul.d $xr4, $xr4, $xr3 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr4, $xr4, $xr1 - xvfmul.d $xr5, $xr5, $xr1 - xvpickve.d $xr6, $xr4, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr4, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr4, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr7, $vr4, 48 - xvpickve.d $xr4, $xr5, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr6, $xr5, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr4, 16 - xvpickve.d $xr4, $xr5, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 32 - xvpickve.d $xr4, $xr5, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpermi.q $xr6, $xr7, 2 - xvst $xr6, $a3, 0 + xvpermi.q $xr5, $xr8, 2 + vreplvei.w $vr6, $vr7, 3 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr8, $vr7, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr6, 16 + vreplvei.w $vr6, $vr7, 1 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr7, $vr7, 0 + fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr6, 16 + xvpermi.q $xr7, $xr8, 2 + xvfsub.d $xr5, $xr3, $xr5 + xvfsub.d $xr6, $xr3, $xr7 + xvfmul.d $xr5, $xr5, $xr4 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr5, $xr5, $xr2 + xvfmul.d $xr6, $xr6, $xr2 + xvfcvt.s.d $xr5, $xr5, $xr6 + xvperm.w $xr5, $xr5, $xr1 + xvst $xr5, $a3, 0 addi.d $a4, $a4, -8 addi.d $a3, $a3, 32 addi.d $a2, $a2, 32 - bnez $a4, .LBB0_68 -# %bb.69: # %middle.block - beq $a0, $a1, .LBB0_17 - b .LBB0_15 -.LBB0_70: # %vector.ph266 + bnez $a4, .LBB0_69 +# %bb.70: # %middle.block + beq $a0, $a1, .LBB0_18 + b .LBB0_16 +.LBB0_71: # %vector.ph266 bstrpick.d $a1, $s1, 30, 3 slli.d $a1, $a1, 3 - xvreplve0.d $xr1, $xr0 - xvldi $xr2, -912 - xvldi $xr3, -928 + pcalau12i $a2, %pc_hi20(.LCPI0_0) + xvld $xr1, $a2, %pc_lo12(.LCPI0_0) + xvreplve0.d $xr2, $xr0 + xvldi $xr3, -912 + xvldi $xr4, -928 + move $a2, $a3 move $a3, $a5 move $a4, $a1 .p2align 4, , 16 -.LBB0_71: # %vector.body271 +.LBB0_72: # %vector.body271 # =>This Inner Loop Header: Depth=1 - xvld $xr4, $a2, 0 - xvpermi.q $xr5, $xr4, 1 - vreplvei.w $vr6, $vr5, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr5, 2 + xvld $xr5, $a2, 0 + xvpermi.q $xr6, $xr5, 1 + vreplvei.w $vr7, $vr6, 3 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr5, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr6, 16 - xvpermi.q $xr5, $xr7, 2 - vreplvei.w $vr6, $vr4, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr4, 2 + vreplvei.w $vr8, $vr6, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr7, 16 + vreplvei.w $vr7, $vr6, 1 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr4, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr4, $vr4, 0 - fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr6, 16 - xvpermi.q $xr4, $xr7, 2 - xvfsub.d $xr5, $xr2, $xr5 - xvfsub.d $xr4, $xr2, $xr4 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr4, $xr4, $xr3 - xvfmul.d $xr5, $xr5, $xr1 - xvfmul.d $xr4, $xr4, $xr1 - xvpickve.d $xr6, $xr5, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr5, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr5, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 48 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - xvld $xr5, $a3, 0 - vextrins.w $vr6, $vr4, 48 - xvpermi.q $xr6, $xr7, 2 - xvst $xr6, $a2, 0 - xvpermi.q $xr4, $xr5, 1 - vreplvei.w $vr6, $vr4, 3 + vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr4, 2 + vextrins.d $vr6, $vr7, 16 + xvpermi.q $xr6, $xr8, 2 + vreplvei.w $vr7, $vr5, 3 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr4, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr4, $vr4, 0 - fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr6, 16 - xvpermi.q $xr4, $xr7, 2 + vreplvei.w $vr8, $vr5, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr7, 16 + vreplvei.w $vr7, $vr5, 1 + fcvt.d.s $fa7, $fa7 + vreplvei.w $vr5, $vr5, 0 + fcvt.d.s $fa5, $fa5 + vextrins.d $vr5, $vr7, 16 + xvpermi.q $xr5, $xr8, 2 + xvfsub.d $xr6, $xr3, $xr6 + xvfsub.d $xr5, $xr3, $xr5 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr5, $xr5, $xr4 + xvfmul.d $xr6, $xr6, $xr2 + xvfmul.d $xr5, $xr5, $xr2 + xvld $xr7, $a3, 0 + xvfcvt.s.d $xr5, $xr6, $xr5 + xvperm.w $xr5, $xr5, $xr1 + xvst $xr5, $a2, 0 + xvpermi.q $xr5, $xr7, 1 vreplvei.w $vr6, $vr5, 3 fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr5, 2 - fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 + vreplvei.w $vr8, $vr5, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr6, 16 vreplvei.w $vr6, $vr5, 1 fcvt.d.s $fa6, $fa6 vreplvei.w $vr5, $vr5, 0 fcvt.d.s $fa5, $fa5 vextrins.d $vr5, $vr6, 16 - xvpermi.q $xr5, $xr7, 2 - xvfsub.d $xr4, $xr2, $xr4 - xvfsub.d $xr5, $xr2, $xr5 - xvfmul.d $xr4, $xr4, $xr3 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr4, $xr4, $xr1 - xvfmul.d $xr5, $xr5, $xr1 - xvpickve.d $xr6, $xr4, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr4, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr4, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr7, $vr4, 48 - xvpickve.d $xr4, $xr5, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr6, $xr5, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr4, 16 - xvpickve.d $xr4, $xr5, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 32 - xvpickve.d $xr4, $xr5, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpermi.q $xr6, $xr7, 2 - xvst $xr6, $a3, 0 + xvpermi.q $xr5, $xr8, 2 + vreplvei.w $vr6, $vr7, 3 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr8, $vr7, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr6, 16 + vreplvei.w $vr6, $vr7, 1 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr7, $vr7, 0 + fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr6, 16 + xvpermi.q $xr7, $xr8, 2 + xvfsub.d $xr5, $xr3, $xr5 + xvfsub.d $xr6, $xr3, $xr7 + xvfmul.d $xr5, $xr5, $xr4 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr5, $xr5, $xr2 + xvfmul.d $xr6, $xr6, $xr2 + xvfcvt.s.d $xr5, $xr5, $xr6 + xvperm.w $xr5, $xr5, $xr1 + xvst $xr5, $a3, 0 addi.d $a4, $a4, -8 addi.d $a3, $a3, 32 addi.d $a2, $a2, 32 - bnez $a4, .LBB0_71 -# %bb.72: # %middle.block276 - beq $a0, $a1, .LBB0_24 - b .LBB0_22 -.LBB0_73: # %vector.memcheck308 + bnez $a4, .LBB0_72 +# %bb.73: # %middle.block276 + beq $a0, $a1, .LBB0_25 + b .LBB0_23 +.LBB0_74: # %vector.memcheck308 addi.d $a0, $s4, 4 alsl.d $a2, $t7, $s4, 2 sltu $a3, $a0, $s5 sltu $a4, $s6, $a2 and $a3, $a3, $a4 - ld.d $s7, $sp, 160 # 8-byte Folded Reload + ld.d $s7, $sp, 152 # 8-byte Folded Reload ld.d $a6, $sp, 392 # 8-byte Folded Reload - ld.d $a7, $sp, 224 # 8-byte Folded Reload - bnez $a3, .LBB0_45 -# %bb.74: # %vector.memcheck308 + ld.d $a7, $sp, 216 # 8-byte Folded Reload + bnez $a3, .LBB0_46 +# %bb.75: # %vector.memcheck308 alsl.d $a3, $t7, $a6, 2 addi.d $a3, $a3, -4 sltu $a0, $a0, $a3 sltu $a2, $a6, $a2 and $a0, $a0, $a2 - ld.d $t0, $sp, 112 # 8-byte Folded Reload - ld.d $t1, $sp, 200 # 8-byte Folded Reload + ld.d $t0, $sp, 104 # 8-byte Folded Reload + ld.d $t1, $sp, 192 # 8-byte Folded Reload ld.d $t2, $sp, 368 # 8-byte Folded Reload - bnez $a0, .LBB0_46 -# %bb.75: # %vector.main.loop.iter.check + bnez $a0, .LBB0_47 +# %bb.76: # %vector.main.loop.iter.check ori $a2, $zero, 17 addi.d $a0, $t7, -1 - bge $a1, $a2, .LBB0_208 -# %bb.76: + bge $a1, $a2, .LBB0_209 +# %bb.77: move $a1, $zero - b .LBB0_212 -.LBB0_77: # %vector.memcheck347 + b .LBB0_213 +.LBB0_78: # %vector.memcheck347 addi.d $a3, $s3, 4 sltu $a4, $a3, $a4 sltu $a5, $s7, $a1 and $a4, $a4, $a5 - bnez $a4, .LBB0_49 -# %bb.78: # %vector.memcheck347 + bnez $a4, .LBB0_50 +# %bb.79: # %vector.memcheck347 alsl.d $a4, $a0, $a7, 2 addi.d $a4, $a4, -4 sltu $a3, $a3, $a4 sltu $a4, $a7, $a1 and $a3, $a3, $a4 - bnez $a3, .LBB0_49 -# %bb.79: # %vector.main.loop.iter.check364 + bnez $a3, .LBB0_50 +# %bb.80: # %vector.main.loop.iter.check364 ori $a2, $zero, 17 - bge $t6, $a2, .LBB0_215 -# %bb.80: + bge $t6, $a2, .LBB0_216 +# %bb.81: move $a3, $zero - b .LBB0_219 -.LBB0_81: # %vector.ph406 + b .LBB0_220 +.LBB0_82: # %vector.ph406 addi.d $a4, $a0, -1 andi $a3, $a4, 12 bstrins.d $a4, $zero, 3, 0 @@ -1032,7 +960,7 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a6, $a2, 36 move $a7, $a4 .p2align 4, , 16 -.LBB0_82: # %vector.body409 +.LBB0_83: # %vector.body409 # =>This Inner Loop Header: Depth=1 xvld $xr0, $a5, -32 xvld $xr1, $a5, 0 @@ -1041,13 +969,13 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a5, $a5, 64 addi.d $a7, $a7, -16 addi.d $a6, $a6, 64 - bnez $a7, .LBB0_82 -# %bb.83: # %middle.block414 + bnez $a7, .LBB0_83 +# %bb.84: # %middle.block414 addi.d $a5, $a0, -1 - beq $a5, $a4, .LBB0_90 -# %bb.84: # %vec.epilog.iter.check418 - beqz $a3, .LBB0_250 -.LBB0_85: # %vec.epilog.ph420 + beq $a5, $a4, .LBB0_91 +# %bb.85: # %vec.epilog.iter.check418 + beqz $a3, .LBB0_251 +.LBB0_86: # %vec.epilog.ph420 addi.d $a3, $a0, -1 move $a5, $a3 bstrins.d $a5, $zero, 1, 0 @@ -1059,36 +987,36 @@ Lalignmm_hmout: # @Lalignmm_hmout add.d $a7, $a2, $a7 sub.d $a4, $a4, $a5 .p2align 4, , 16 -.LBB0_86: # %vec.epilog.vector.body423 +.LBB0_87: # %vec.epilog.vector.body423 # =>This Inner Loop Header: Depth=1 vld $vr0, $a6, 0 vst $vr0, $a7, 0 addi.d $a6, $a6, 16 addi.d $a4, $a4, 4 addi.d $a7, $a7, 16 - bnez $a4, .LBB0_86 -# %bb.87: # %vec.epilog.middle.block428 + bnez $a4, .LBB0_87 +# %bb.88: # %vec.epilog.middle.block428 addi.d $a4, $a0, -1 - beq $a4, $a5, .LBB0_90 -.LBB0_88: # %.lr.ph29.i.preheader + beq $a4, $a5, .LBB0_91 +.LBB0_89: # %.lr.ph29.i.preheader sub.d $a4, $a0, $a3 alsl.d $a2, $a3, $a2, 2 alsl.d $a3, $a3, $s3, 2 .p2align 4, , 16 -.LBB0_89: # %.lr.ph29.i +.LBB0_90: # %.lr.ph29.i # =>This Inner Loop Header: Depth=1 fld.s $fa0, $a3, 0 fst.s $fa0, $a2, 0 addi.d $a4, $a4, -1 addi.d $a2, $a2, 4 addi.d $a3, $a3, 4 - bnez $a4, .LBB0_89 -.LBB0_90: # %iter.check447 + bnez $a4, .LBB0_90 +.LBB0_91: # %iter.check447 ori $a2, $zero, 1 ori $a3, $zero, 5 addi.d $t2, $t1, 4 - bge $t6, $a3, .LBB0_185 -.LBB0_91: # %vec.epilog.scalar.ph463.preheader + bge $t6, $a3, .LBB0_186 +.LBB0_92: # %vec.epilog.scalar.ph463.preheader sub.d $a1, $a0, $a2 ld.d $a3, $sp, 448 # 8-byte Folded Reload alsl.d $a3, $a2, $a3, 2 @@ -1096,7 +1024,7 @@ Lalignmm_hmout: # @Lalignmm_hmout alsl.d $a2, $a2, $s3, 2 addi.d $a2, $a2, -4 .p2align 4, , 16 -.LBB0_92: # %vec.epilog.scalar.ph463 +.LBB0_93: # %vec.epilog.scalar.ph463 # =>This Inner Loop Header: Depth=1 fld.s $fa0, $a2, 0 fld.s $fa1, $s5, 0 @@ -1107,8 +1035,8 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a3, $a3, 4 addi.d $a4, $a4, 4 addi.d $a2, $a2, 4 - bnez $a1, .LBB0_92 -.LBB0_93: # %.lr.ph50.i + bnez $a1, .LBB0_93 +.LBB0_94: # %.lr.ph50.i st.d $t2, $sp, 256 # 8-byte Folded Spill st.d $t6, $sp, 240 # 8-byte Folded Spill st.d $t5, $sp, 16 # 8-byte Folded Spill @@ -1117,9 +1045,9 @@ Lalignmm_hmout: # @Lalignmm_hmout slli.d $a1, $t3, 2 st.d $a1, $sp, 408 # 8-byte Folded Spill fldx.s $fa0, $s3, $a1 - ld.d $a1, $sp, 168 # 8-byte Folded Reload + ld.d $a1, $sp, 160 # 8-byte Folded Reload bstrpick.d $a1, $a1, 31, 0 - ld.d $a2, $sp, 192 # 8-byte Folded Reload + ld.d $a2, $sp, 184 # 8-byte Folded Reload fst.s $fa0, $a2, 0 movgr2fr.d $fa0, $a1 ffint.d.l $fa0, $fa0 @@ -1130,7 +1058,7 @@ Lalignmm_hmout: # @Lalignmm_hmout ld.d $a1, $sp, 448 # 8-byte Folded Reload addi.d $a1, $a1, 4 st.d $a1, $sp, 248 # 8-byte Folded Spill - st.d $t3, $sp, 184 # 8-byte Folded Spill + st.d $t3, $sp, 176 # 8-byte Folded Spill alsl.d $a1, $t3, $t1, 2 st.d $a1, $sp, 432 # 8-byte Folded Spill bstrpick.d $a1, $t0, 30, 0 @@ -1143,12 +1071,12 @@ Lalignmm_hmout: # @Lalignmm_hmout move $a0, $s3 ld.d $a1, $sp, 48 # 8-byte Folded Reload st.d $a1, $sp, 456 # 8-byte Folded Spill - b .LBB0_95 + b .LBB0_96 .p2align 4, , 16 -.LBB0_94: # %._crit_edge45.i - # in Loop: Header=BB0_95 Depth=1 +.LBB0_95: # %._crit_edge45.i + # in Loop: Header=BB0_96 Depth=1 fldx.s $fa0, $s8, $fp - ld.d $a2, $sp, 192 # 8-byte Folded Reload + ld.d $a2, $sp, 184 # 8-byte Folded Reload fstx.s $fa0, $a2, $a0 ld.d $a0, $sp, 432 # 8-byte Folded Reload fld.s $fa0, $a0, 0 @@ -1158,10 +1086,10 @@ Lalignmm_hmout: # @Lalignmm_hmout fstx.s $fa0, $a1, $a0 move $a0, $s8 move $t7, $s3 - beq $s2, $s3, .LBB0_107 -.LBB0_95: # =>This Loop Header: Depth=1 - # Child Loop BB0_105 Depth 2 - # Child Loop BB0_101 Depth 2 + beq $s2, $s3, .LBB0_108 +.LBB0_96: # =>This Loop Header: Depth=1 + # Child Loop BB0_106 Depth 2 + # Child Loop BB0_102 Depth 2 move $s3, $t7 addi.d $s4, $s2, -1 slli.d $a1, $s4, 2 @@ -1185,14 +1113,14 @@ Lalignmm_hmout: # @Lalignmm_hmout fldx.s $fa0, $fp, $a0 fst.s $fa0, $s8, 0 fldx.s $fa0, $s6, $a0 - ld.d $ra, $sp, 200 # 8-byte Folded Reload + ld.d $ra, $sp, 192 # 8-byte Folded Reload fst.s $fa0, $ra, 0 ld.d $t8, $sp, 344 # 8-byte Folded Reload - bne $s2, $s0, .LBB0_97 -# %bb.96: # in Loop: Header=BB0_95 Depth=1 + bne $s2, $s0, .LBB0_98 +# %bb.97: # in Loop: Header=BB0_96 Depth=1 fst.s $fa0, $t8, 0 -.LBB0_97: # %.lr.ph44.i - # in Loop: Header=BB0_95 Depth=1 +.LBB0_98: # %.lr.ph44.i + # in Loop: Header=BB0_96 Depth=1 ld.d $fp, $sp, 456 # 8-byte Folded Reload fld.s $fa0, $fp, 0 fld.s $fa1, $s7, 4 @@ -1205,27 +1133,27 @@ Lalignmm_hmout: # @Lalignmm_hmout fadd.s $fa0, $fa0, $fa1 ld.d $a3, $sp, 392 # 8-byte Folded Reload alsl.d $a3, $s4, $a3, 2 - ld.d $t3, $sp, 224 # 8-byte Folded Reload + ld.d $t3, $sp, 216 # 8-byte Folded Reload ld.d $t4, $sp, 360 # 8-byte Folded Reload ld.d $t5, $sp, 352 # 8-byte Folded Reload - ld.d $t6, $sp, 208 # 8-byte Folded Reload - ld.d $t7, $sp, 216 # 8-byte Folded Reload + ld.d $t6, $sp, 200 # 8-byte Folded Reload + ld.d $t7, $sp, 208 # 8-byte Folded Reload move $s8, $a5 - bne $s2, $s0, .LBB0_103 -# %bb.98: # %.lr.ph44.split.us.i.preheader - # in Loop: Header=BB0_95 Depth=1 + bne $s2, $s0, .LBB0_104 +# %bb.99: # %.lr.ph44.split.us.i.preheader + # in Loop: Header=BB0_96 Depth=1 move $a5, $zero move $a7, $zero ori $a6, $zero, 4 ld.d $fp, $sp, 408 # 8-byte Folded Reload - b .LBB0_101 + b .LBB0_102 .p2align 4, , 16 -.LBB0_99: # in Loop: Header=BB0_101 Depth=2 +.LBB0_100: # in Loop: Header=BB0_102 Depth=2 fstx.s $fa6, $ra, $a6 ld.d $t0, $sp, 448 # 8-byte Folded Reload stx.w $s4, $t0, $a6 move $t0, $s4 -.LBB0_100: # in Loop: Header=BB0_101 Depth=2 +.LBB0_101: # in Loop: Header=BB0_102 Depth=2 fadd.s $fa5, $fa0, $fa5 fcmp.clt.s $fcc0, $fa1, $fa5 fadd.s $fa3, $fa3, $fa4 @@ -1254,9 +1182,9 @@ Lalignmm_hmout: # @Lalignmm_hmout fstx.s $fa0, $t4, $a6 addi.d $a6, $a6, 4 addi.d $a5, $a5, 1 - beq $s1, $a6, .LBB0_94 -.LBB0_101: # %.lr.ph44.split.us.i - # Parent Loop BB0_95 Depth=1 + beq $s1, $a6, .LBB0_95 +.LBB0_102: # %.lr.ph44.split.us.i + # Parent Loop BB0_96 Depth=1 # => This Inner Loop Header: Depth=2 ld.d $t0, $sp, 456 # 8-byte Folded Reload add.d $t0, $t0, $a6 @@ -1269,15 +1197,15 @@ Lalignmm_hmout: # @Lalignmm_hmout fld.s $fa4, $a3, 0 fadd.s $fa6, $fa1, $fa6 fcmp.cle.s $fcc0, $fa3, $fa6 - bcnez $fcc0, .LBB0_99 -# %bb.102: # %.lr.ph44.split.us._crit_edge.i - # in Loop: Header=BB0_101 Depth=2 + bcnez $fcc0, .LBB0_100 +# %bb.103: # %.lr.ph44.split.us._crit_edge.i + # in Loop: Header=BB0_102 Depth=2 ld.d $t0, $sp, 448 # 8-byte Folded Reload ldx.w $t0, $t0, $a6 - b .LBB0_100 + b .LBB0_101 .p2align 4, , 16 -.LBB0_103: # %.lr.ph44.split.i.preheader - # in Loop: Header=BB0_95 Depth=1 +.LBB0_104: # %.lr.ph44.split.i.preheader + # in Loop: Header=BB0_96 Depth=1 addi.d $a5, $a5, 4 addi.d $a6, $a1, 4 addi.d $a4, $a4, 4 @@ -1288,9 +1216,9 @@ Lalignmm_hmout: # @Lalignmm_hmout ld.d $t3, $sp, 248 # 8-byte Folded Reload move $t4, $fp ld.d $fp, $sp, 408 # 8-byte Folded Reload - b .LBB0_105 + b .LBB0_106 .p2align 4, , 16 -.LBB0_104: # in Loop: Header=BB0_105 Depth=2 +.LBB0_105: # in Loop: Header=BB0_106 Depth=2 fadd.s $fa4, $fa0, $fa4 fcmp.clt.s $fcc0, $fa1, $fa4 fadd.s $fa3, $fa3, $fa5 @@ -1315,9 +1243,9 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a4, $a4, 4 addi.d $t0, $t0, 4 addi.d $a7, $a7, 4 - beqz $t1, .LBB0_94 -.LBB0_105: # %.lr.ph44.split.i - # Parent Loop BB0_95 Depth=1 + beqz $t1, .LBB0_95 +.LBB0_106: # %.lr.ph44.split.i + # Parent Loop BB0_96 Depth=1 # => This Inner Loop Header: Depth=2 fld.s $fa1, $t4, 0 fld.s $fa4, $a7, 0 @@ -1327,12 +1255,12 @@ Lalignmm_hmout: # @Lalignmm_hmout fld.s $fa5, $a3, 0 fadd.s $fa6, $fa1, $fa6 fcmp.cult.s $fcc0, $fa6, $fa3 - bcnez $fcc0, .LBB0_104 -# %bb.106: # in Loop: Header=BB0_105 Depth=2 + bcnez $fcc0, .LBB0_105 +# %bb.107: # in Loop: Header=BB0_106 Depth=2 fst.s $fa6, $t2, 0 st.w $s4, $t3, 0 - b .LBB0_104 -.LBB0_107: # %iter.check494 + b .LBB0_105 +.LBB0_108: # %iter.check494 ori $a7, $zero, 1 ld.d $s3, $sp, 416 # 8-byte Folded Reload move $a0, $s3 @@ -1340,7 +1268,7 @@ Lalignmm_hmout: # @Lalignmm_hmout move $a1, $s0 ld.d $fp, $sp, 384 # 8-byte Folded Reload move $a2, $fp - ld.d $a3, $sp, 184 # 8-byte Folded Reload + ld.d $a3, $sp, 176 # 8-byte Folded Reload ld.d $a4, $sp, 440 # 8-byte Folded Reload ld.d $s1, $sp, 336 # 8-byte Folded Reload move $a5, $s1 @@ -1365,27 +1293,27 @@ Lalignmm_hmout: # @Lalignmm_hmout ld.d $a4, $sp, 392 # 8-byte Folded Reload alsl.d $a2, $a5, $a4, 2 move $ra, $a5 - bgeu $fp, $a0, .LBB0_189 -# %bb.108: + bgeu $fp, $a0, .LBB0_190 +# %bb.109: move $a0, $zero - ld.d $t1, $sp, 224 # 8-byte Folded Reload - ld.d $t2, $sp, 112 # 8-byte Folded Reload + ld.d $t1, $sp, 216 # 8-byte Folded Reload + ld.d $t2, $sp, 104 # 8-byte Folded Reload ld.d $t3, $sp, 360 # 8-byte Folded Reload ld.d $t4, $sp, 352 # 8-byte Folded Reload ld.d $s2, $sp, 344 # 8-byte Folded Reload - ld.d $t6, $sp, 200 # 8-byte Folded Reload + ld.d $t6, $sp, 192 # 8-byte Folded Reload ld.d $t0, $sp, 368 # 8-byte Folded Reload -.LBB0_109: # %vec.epilog.scalar.ph512.preheader - ld.d $t7, $sp, 184 # 8-byte Folded Reload +.LBB0_110: # %vec.epilog.scalar.ph512.preheader + ld.d $t7, $sp, 176 # 8-byte Folded Reload ld.d $fp, $sp, 408 # 8-byte Folded Reload move $a7, $s8 -.LBB0_110: # %vec.epilog.scalar.ph512.preheader +.LBB0_111: # %vec.epilog.scalar.ph512.preheader alsl.d $a1, $a0, $s3, 2 sub.d $a3, $a5, $a0 alsl.d $a0, $a0, $s6, 2 addi.d $a0, $a0, 4 .p2align 4, , 16 -.LBB0_111: # %vec.epilog.scalar.ph512 +.LBB0_112: # %vec.epilog.scalar.ph512 # =>This Inner Loop Header: Depth=1 fld.s $fa0, $a2, 0 fld.s $fa1, $a0, 0 @@ -1396,22 +1324,22 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a1, $a1, 4 addi.d $a3, $a3, -1 addi.d $a0, $a0, 4 - bnez $a3, .LBB0_111 -.LBB0_112: # %iter.check543 + bnez $a3, .LBB0_112 +.LBB0_113: # %iter.check543 alsl.d $a0, $t7, $t1, 2 ori $a3, $zero, 5 alsl.d $a1, $t7, $a7, 2 ld.d $a4, $sp, 424 # 8-byte Folded Reload - bgeu $a4, $a3, .LBB0_193 -# %bb.113: + bgeu $a4, $a3, .LBB0_194 +# %bb.114: move $a3, $zero -.LBB0_114: # %vec.epilog.scalar.ph561.preheader +.LBB0_115: # %vec.epilog.scalar.ph561.preheader alsl.d $a4, $a3, $a7, 2 sub.d $a5, $t7, $a3 alsl.d $a3, $a3, $s7, 2 addi.d $a3, $a3, 4 .p2align 4, , 16 -.LBB0_115: # %vec.epilog.scalar.ph561 +.LBB0_116: # %vec.epilog.scalar.ph561 # =>This Inner Loop Header: Depth=1 fld.s $fa0, $a0, 0 fld.s $fa1, $a3, 0 @@ -1422,12 +1350,12 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a4, $a4, 4 addi.d $a5, $a5, -1 addi.d $a3, $a3, 4 - bnez $a5, .LBB0_115 -.LBB0_116: # %.lr.ph60.i.preheader + bnez $a5, .LBB0_116 +.LBB0_117: # %.lr.ph60.i.preheader move $a3, $ra move $a4, $t0 .p2align 4, , 16 -.LBB0_117: # %.lr.ph60.i +.LBB0_118: # %.lr.ph60.i # =>This Inner Loop Header: Depth=1 ld.d $a5, $a4, 0 fld.s $fa0, $a2, 0 @@ -1439,22 +1367,22 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a4, $a4, 8 addi.d $a3, $a3, -1 addi.d $s5, $s5, 4 - bnez $a3, .LBB0_117 -# %bb.118: # %iter.check592 + bnez $a3, .LBB0_118 +# %bb.119: # %iter.check592 slli.d $a2, $ra, 3 ldx.d $a2, $t0, $a2 ori $a3, $zero, 5 ld.d $a4, $sp, 424 # 8-byte Folded Reload - bgeu $a4, $a3, .LBB0_197 -# %bb.119: + bgeu $a4, $a3, .LBB0_198 +# %bb.120: move $a3, $zero -.LBB0_120: # %vec.epilog.scalar.ph610.preheader +.LBB0_121: # %vec.epilog.scalar.ph610.preheader alsl.d $a2, $a3, $a2, 2 sub.d $a4, $t7, $a3 alsl.d $a3, $a3, $s7, 2 addi.d $a3, $a3, 4 .p2align 4, , 16 -.LBB0_121: # %vec.epilog.scalar.ph610 +.LBB0_122: # %vec.epilog.scalar.ph610 # =>This Inner Loop Header: Depth=1 fld.s $fa0, $a0, 0 fld.s $fa1, $a3, 0 @@ -1465,8 +1393,8 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a2, $a2, 4 addi.d $a4, $a4, -1 addi.d $a3, $a3, 4 - bnez $a4, .LBB0_121 -.LBB0_122: # %.lr.ph64.i + bnez $a4, .LBB0_122 +.LBB0_123: # %.lr.ph64.i slti $a3, $t7, 1 ori $a2, $zero, 1 masknez $a4, $a2, $a3 @@ -1476,11 +1404,11 @@ Lalignmm_hmout: # @Lalignmm_hmout add.w $a4, $a3, $t2 ori $a6, $zero, 23 addi.d $a3, $a0, -4 - bgeu $a4, $a6, .LBB0_201 -# %bb.123: + bgeu $a4, $a6, .LBB0_202 +# %bb.124: move $a0, $t7 ld.d $t8, $sp, 304 # 8-byte Folded Reload -.LBB0_124: # %scalar.ph639.preheader +.LBB0_125: # %scalar.ph639.preheader addi.d $a1, $a0, 1 ld.d $a4, $sp, 448 # 8-byte Folded Reload alsl.d $a4, $a0, $a4, 2 @@ -1488,7 +1416,7 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a5, $a5, -4 alsl.d $a0, $a0, $a7, 2 .p2align 4, , 16 -.LBB0_125: # %scalar.ph639 +.LBB0_126: # %scalar.ph639 # =>This Inner Loop Header: Depth=1 fld.s $fa0, $a0, 0 fld.s $fa1, $a3, 0 @@ -1499,8 +1427,8 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a4, $a4, -4 addi.d $a5, $a5, -4 addi.d $a0, $a0, -4 - blt $a2, $a1, .LBB0_125 -.LBB0_126: # %.lr.ph104.i + blt $a2, $a1, .LBB0_126 +.LBB0_127: # %.lr.ph104.i move $s8, $zero move $a5, $zero move $a6, $zero @@ -1521,48 +1449,48 @@ Lalignmm_hmout: # @Lalignmm_hmout masknez $a2, $a2, $a1 maskeqz $a1, $a3, $a1 or $a1, $a1, $a2 - st.d $a1, $sp, 152 # 8-byte Folded Spill - ld.d $a1, $sp, 56 # 8-byte Folded Reload + st.d $a1, $sp, 144 # 8-byte Folded Spill + ld.d $a1, $sp, 64 # 8-byte Folded Reload alsl.d $a1, $a0, $a1, 2 st.d $a1, $sp, 440 # 8-byte Folded Spill - ld.d $a1, $sp, 64 # 8-byte Folded Reload + ld.d $a1, $sp, 72 # 8-byte Folded Reload alsl.d $a1, $a0, $a1, 2 st.d $a1, $sp, 432 # 8-byte Folded Spill alsl.d $s5, $a0, $t4, 2 alsl.d $s7, $a0, $t3, 2 - st.d $a4, $sp, 176 # 8-byte Folded Spill + st.d $a4, $sp, 168 # 8-byte Folded Spill addi.d $a0, $a4, 4 add.d $s6, $s2, $a0 - ld.d $a1, $sp, 160 # 8-byte Folded Reload + ld.d $a1, $sp, 152 # 8-byte Folded Reload add.d $fp, $a1, $a0 addi.d $a0, $t7, -2 - st.d $a0, $sp, 144 # 8-byte Folded Spill + st.d $a0, $sp, 136 # 8-byte Folded Spill lu12i.w $a0, -216695 ori $a0, $a0, 1664 movgr2fr.w $fs0, $a0 addi.d $a0, $t4, 8 - st.d $a0, $sp, 136 # 8-byte Folded Spill + st.d $a0, $sp, 128 # 8-byte Folded Spill movgr2fr.w $fa4, $zero move $a4, $ra - b .LBB0_129 + b .LBB0_130 .p2align 4, , 16 -.LBB0_127: # in Loop: Header=BB0_129 Depth=1 +.LBB0_128: # in Loop: Header=BB0_130 Depth=1 move $a0, $t7 move $a1, $a5 move $a2, $s8 -.LBB0_128: # in Loop: Header=BB0_129 Depth=1 +.LBB0_129: # in Loop: Header=BB0_130 Depth=1 st.d $s3, $sp, 296 # 8-byte Folded Spill move $s8, $a2 move $a5, $a1 move $a6, $a0 ld.d $a7, $sp, 400 # 8-byte Folded Reload ori $a0, $zero, 1 - bge $a0, $s2, .LBB0_160 -.LBB0_129: # %.lr.ph74.i + bge $a0, $s2, .LBB0_161 +.LBB0_130: # %.lr.ph74.i # =>This Loop Header: Depth=1 - # Child Loop BB0_131 Depth 2 - # Child Loop BB0_145 Depth 2 - # Child Loop BB0_147 Depth 2 + # Child Loop BB0_132 Depth 2 + # Child Loop BB0_146 Depth 2 + # Child Loop BB0_148 Depth 2 st.d $a6, $sp, 240 # 8-byte Folded Spill st.d $a5, $sp, 248 # 8-byte Folded Spill xvst $xr4, $sp, 256 # 32-byte Folded Spill @@ -1622,15 +1550,15 @@ Lalignmm_hmout: # @Lalignmm_hmout ldx.d $t0, $t2, $t0 sltui $a7, $a7, 1 or $a6, $a6, $a7 - ld.d $t2, $sp, 176 # 8-byte Folded Reload + ld.d $t2, $sp, 168 # 8-byte Folded Reload add.d $a7, $t1, $t2 add.d $t0, $t0, $t2 - ld.d $t8, $sp, 184 # 8-byte Folded Reload + ld.d $t8, $sp, 176 # 8-byte Folded Reload move $t1, $t8 move $t2, $t8 - b .LBB0_131 + b .LBB0_132 .p2align 4, , 16 -.LBB0_130: # in Loop: Header=BB0_131 Depth=2 +.LBB0_131: # in Loop: Header=BB0_132 Depth=2 fldx.s $fa2, $t0, $a0 fadd.s $fa2, $fa1, $fa2 fstx.s $fa2, $t0, $a0 @@ -1647,8 +1575,8 @@ Lalignmm_hmout: # @Lalignmm_hmout fstx.s $fa1, $a3, $a0 addi.w $t1, $t1, -1 addi.d $a0, $a0, -4 - blez $t1, .LBB0_141 -.LBB0_131: # Parent Loop BB0_129 Depth=1 + blez $t1, .LBB0_142 +.LBB0_132: # Parent Loop BB0_130 Depth=1 # => This Inner Loop Header: Depth=2 fldx.s $fa1, $fp, $a0 fldx.s $fa2, $a2, $a0 @@ -1660,18 +1588,18 @@ Lalignmm_hmout: # @Lalignmm_hmout fldx.s $fa3, $s0, $a0 fadd.s $fa5, $fa4, $fa5 fcmp.cule.s $fcc1, $fa5, $fa1 - bcnez $fcc1, .LBB0_133 -# %bb.132: # in Loop: Header=BB0_131 Depth=2 + bcnez $fcc1, .LBB0_134 +# %bb.133: # in Loop: Header=BB0_132 Depth=2 ldx.w $t3, $s4, $a0 fmov.s $fa1, $fa5 move $t4, $t1 fld.s $fa5, $a4, 0 fadd.s $fa5, $fa2, $fa5 fcmp.cult.s $fcc0, $fa5, $fa4 - bceqz $fcc0, .LBB0_134 - b .LBB0_135 + bceqz $fcc0, .LBB0_135 + b .LBB0_136 .p2align 4, , 16 -.LBB0_133: # in Loop: Header=BB0_131 Depth=2 +.LBB0_134: # in Loop: Header=BB0_132 Depth=2 movcf2gr $t3, $fcc0 masknez $t4, $t1, $t3 maskeqz $t3, $t2, $t3 @@ -1680,28 +1608,28 @@ Lalignmm_hmout: # @Lalignmm_hmout fld.s $fa5, $a4, 0 fadd.s $fa5, $fa2, $fa5 fcmp.cult.s $fcc0, $fa5, $fa4 - bcnez $fcc0, .LBB0_135 -.LBB0_134: # in Loop: Header=BB0_131 Depth=2 + bcnez $fcc0, .LBB0_136 +.LBB0_135: # in Loop: Header=BB0_132 Depth=2 fstx.s $fa5, $s1, $a0 stx.w $s2, $s4, $a0 -.LBB0_135: # in Loop: Header=BB0_131 Depth=2 - beqz $a6, .LBB0_138 -# %bb.136: # in Loop: Header=BB0_131 Depth=2 +.LBB0_136: # in Loop: Header=BB0_132 Depth=2 + beqz $a6, .LBB0_139 +# %bb.137: # in Loop: Header=BB0_132 Depth=2 ld.d $t6, $sp, 432 # 8-byte Folded Reload stx.w $t3, $t6, $a0 ld.d $t3, $sp, 440 # 8-byte Folded Reload stx.w $t4, $t3, $a0 - beq $t5, $a1, .LBB0_139 -.LBB0_137: # in Loop: Header=BB0_131 Depth=2 + beq $t5, $a1, .LBB0_140 +.LBB0_138: # in Loop: Header=BB0_132 Depth=2 fadd.s $fa2, $fa2, $fa3 fcmp.cult.s $fcc0, $fa2, $fa0 fsel $fa0, $fa2, $fa0, $fcc0 - bne $s2, $a1, .LBB0_130 - b .LBB0_140 + bne $s2, $a1, .LBB0_131 + b .LBB0_141 .p2align 4, , 16 -.LBB0_138: # in Loop: Header=BB0_131 Depth=2 - bne $t5, $a1, .LBB0_137 -.LBB0_139: # in Loop: Header=BB0_131 Depth=2 +.LBB0_139: # in Loop: Header=BB0_132 Depth=2 + bne $t5, $a1, .LBB0_138 +.LBB0_140: # in Loop: Header=BB0_132 Depth=2 fldx.s $fa4, $s5, $a0 fadd.s $fa4, $fa1, $fa4 fstx.s $fa4, $s5, $a0 @@ -1712,15 +1640,15 @@ Lalignmm_hmout: # @Lalignmm_hmout fadd.s $fa2, $fa2, $fa3 fcmp.cult.s $fcc0, $fa2, $fa0 fsel $fa0, $fa2, $fa0, $fcc0 - bne $s2, $a1, .LBB0_130 -.LBB0_140: # in Loop: Header=BB0_131 Depth=2 + bne $s2, $a1, .LBB0_131 +.LBB0_141: # in Loop: Header=BB0_132 Depth=2 fldx.s $fa2, $s7, $a0 fadd.s $fa2, $fa0, $fa2 fstx.s $fa2, $s7, $a0 - b .LBB0_130 + b .LBB0_131 .p2align 4, , 16 -.LBB0_141: # %._crit_edge75.i - # in Loop: Header=BB0_129 Depth=1 +.LBB0_142: # %._crit_edge75.i + # in Loop: Header=BB0_130 Depth=1 fldx.s $fa0, $a2, $a0 fld.s $fa1, $a4, 0 fadd.s $fa0, $fa0, $fa1 @@ -1729,29 +1657,29 @@ Lalignmm_hmout: # @Lalignmm_hmout ld.d $t0, $sp, 344 # 8-byte Folded Reload ld.d $t2, $sp, 312 # 8-byte Folded Reload move $a4, $t5 - bne $t5, $a1, .LBB0_143 -# %bb.142: # in Loop: Header=BB0_129 Depth=1 + bne $t5, $a1, .LBB0_144 +# %bb.143: # in Loop: Header=BB0_130 Depth=1 fld.s $fa0, $t0, 0 fadd.s $fa0, $fs0, $fa0 fst.s $fa0, $t0, 0 -.LBB0_143: # in Loop: Header=BB0_129 Depth=1 +.LBB0_144: # in Loop: Header=BB0_130 Depth=1 addi.w $a0, $s3, -1 ld.d $a6, $sp, 360 # 8-byte Folded Reload ld.d $a7, $sp, 352 # 8-byte Folded Reload ld.d $t1, $sp, 424 # 8-byte Folded Reload xvld $xr4, $sp, 256 # 32-byte Folded Reload ld.d $a5, $sp, 248 # 8-byte Folded Reload - bne $s2, $a1, .LBB0_155 -# %bb.144: # %.lr.ph82.preheader.i - # in Loop: Header=BB0_129 Depth=1 + bne $s2, $a1, .LBB0_156 +# %bb.145: # %.lr.ph82.preheader.i + # in Loop: Header=BB0_130 Depth=1 fld.s $fa4, $a7, 4 move $s8, $zero ori $a1, $zero, 2 - ld.d $a2, $sp, 136 # 8-byte Folded Reload - ld.d $a3, $sp, 144 # 8-byte Folded Reload + ld.d $a2, $sp, 128 # 8-byte Folded Reload + ld.d $a3, $sp, 136 # 8-byte Folded Reload .p2align 4, , 16 -.LBB0_145: # %.lr.ph82.i - # Parent Loop BB0_129 Depth=1 +.LBB0_146: # %.lr.ph82.i + # Parent Loop BB0_130 Depth=1 # => This Inner Loop Header: Depth=2 fld.s $fa0, $a2, 0 fcmp.clt.s $fcc1, $fa4, $fa0 @@ -1763,15 +1691,15 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a3, $a3, -1 addi.w $a1, $a1, 1 addi.d $a2, $a2, 4 - bnez $a3, .LBB0_145 -# %bb.146: # %.lr.ph88.i.preheader - # in Loop: Header=BB0_129 Depth=1 + bnez $a3, .LBB0_146 +# %bb.147: # %.lr.ph88.i.preheader + # in Loop: Header=BB0_130 Depth=1 move $a1, $zero move $a2, $t0 - ld.d $a3, $sp, 152 # 8-byte Folded Reload + ld.d $a3, $sp, 144 # 8-byte Folded Reload .p2align 4, , 16 -.LBB0_147: # %.lr.ph88.i - # Parent Loop BB0_129 Depth=1 +.LBB0_148: # %.lr.ph88.i + # Parent Loop BB0_130 Depth=1 # => This Inner Loop Header: Depth=2 fld.s $fa0, $a2, 0 fcmp.clt.s $fcc1, $fa4, $fa0 @@ -1783,74 +1711,74 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a3, $a3, -1 addi.w $a1, $a1, 1 addi.d $a2, $a2, 4 - bnez $a3, .LBB0_147 -# %bb.148: # %._crit_edge89.i - # in Loop: Header=BB0_129 Depth=1 + bnez $a3, .LBB0_148 +# %bb.149: # %._crit_edge89.i + # in Loop: Header=BB0_130 Depth=1 slli.d $a1, $s8, 2 fldx.s $fa0, $a7, $a1 addi.w $a5, $s8, -1 - blez $s8, .LBB0_153 -# %bb.149: # in Loop: Header=BB0_129 Depth=1 + blez $s8, .LBB0_154 +# %bb.150: # in Loop: Header=BB0_130 Depth=1 slli.d $a2, $a5, 2 fldx.s $fa1, $a6, $a2 fcmp.cule.s $fcc1, $fa1, $fa0 move $a2, $a5 move $a4, $t5 - bcnez $fcc1, .LBB0_151 -# %bb.150: # in Loop: Header=BB0_129 Depth=1 - ld.d $a2, $sp, 208 # 8-byte Folded Reload + bcnez $fcc1, .LBB0_152 +# %bb.151: # in Loop: Header=BB0_130 Depth=1 + ld.d $a2, $sp, 200 # 8-byte Folded Reload ldx.w $a2, $a2, $a1 fmov.s $fa0, $fa1 -.LBB0_151: # in Loop: Header=BB0_129 Depth=1 +.LBB0_152: # in Loop: Header=BB0_130 Depth=1 fldx.s $fa1, $t0, $a1 fcmp.cule.s $fcc1, $fa1, $fa0 - bcnez $fcc1, .LBB0_154 -.LBB0_152: # in Loop: Header=BB0_129 Depth=1 - ld.d $a2, $sp, 216 # 8-byte Folded Reload + bcnez $fcc1, .LBB0_155 +.LBB0_153: # in Loop: Header=BB0_130 Depth=1 + ld.d $a2, $sp, 208 # 8-byte Folded Reload ldx.w $t7, $a2, $a1 - b .LBB0_155 -.LBB0_153: # in Loop: Header=BB0_129 Depth=1 + b .LBB0_156 +.LBB0_154: # in Loop: Header=BB0_130 Depth=1 move $a2, $a5 move $a4, $t5 fldx.s $fa1, $t0, $a1 fcmp.cule.s $fcc1, $fa1, $fa0 - bceqz $fcc1, .LBB0_152 -.LBB0_154: # in Loop: Header=BB0_129 Depth=1 + bceqz $fcc1, .LBB0_153 +.LBB0_155: # in Loop: Header=BB0_130 Depth=1 move $t7, $a0 move $a5, $a2 -.LBB0_155: # in Loop: Header=BB0_129 Depth=1 +.LBB0_156: # in Loop: Header=BB0_130 Depth=1 movcf2gr $a1, $fcc0 masknez $a2, $t2, $a1 maskeqz $a1, $s2, $a1 bstrpick.d $a3, $t7, 31, 0 or $t5, $a1, $a2 - bne $a4, $a3, .LBB0_127 -# %bb.156: # in Loop: Header=BB0_129 Depth=1 - beqz $s8, .LBB0_159 -# %bb.157: # in Loop: Header=BB0_129 Depth=1 + bne $a4, $a3, .LBB0_128 +# %bb.157: # in Loop: Header=BB0_130 Depth=1 + beqz $s8, .LBB0_160 +# %bb.158: # in Loop: Header=BB0_130 Depth=1 move $a1, $t8 move $a2, $t1 - bge $s8, $t1, .LBB0_128 -# %bb.158: # in Loop: Header=BB0_129 Depth=1 + bge $s8, $t1, .LBB0_129 +# %bb.159: # in Loop: Header=BB0_130 Depth=1 slli.d $a0, $a5, 2 - ld.d $a1, $sp, 64 # 8-byte Folded Reload + ld.d $a1, $sp, 72 # 8-byte Folded Reload ldx.w $s3, $a1, $a0 - ld.d $a1, $sp, 56 # 8-byte Folded Reload + ld.d $a1, $sp, 64 # 8-byte Folded Reload ldx.w $a2, $a1, $a0 move $a0, $t7 move $a1, $a5 - b .LBB0_128 -.LBB0_159: # in Loop: Header=BB0_129 Depth=1 + b .LBB0_129 +.LBB0_160: # in Loop: Header=BB0_130 Depth=1 move $a1, $zero addi.w $a0, $t5, -1 ori $a2, $zero, 1 move $s3, $t5 - b .LBB0_128 -.LBB0_160: # %.preheader2.preheader.i + b .LBB0_129 +.LBB0_161: # %.preheader2.preheader.i move $t6, $zero - ld.d $a1, $sp, 168 # 8-byte Folded Reload + ld.d $a1, $sp, 160 # 8-byte Folded Reload bstrpick.d $a1, $a1, 30, 0 - ld.d $a4, $sp, 112 # 8-byte Folded Reload + ld.d $a4, $sp, 104 # 8-byte Folded Reload andi $a2, $a4, 12 bstrpick.d $a3, $a4, 30, 4 slli.d $a3, $a3, 4 @@ -1864,47 +1792,47 @@ Lalignmm_hmout: # @Lalignmm_hmout ori $t0, $zero, 16 ld.d $s7, $sp, 88 # 8-byte Folded Reload ld.d $s8, $sp, 80 # 8-byte Folded Reload + ld.d $s6, $sp, 112 # 8-byte Folded Reload ld.d $s5, $sp, 120 # 8-byte Folded Reload - ld.d $s4, $sp, 128 # 8-byte Folded Reload - ld.d $s6, $sp, 72 # 8-byte Folded Reload + ld.d $s4, $sp, 56 # 8-byte Folded Reload ld.d $a0, $sp, 40 # 8-byte Folded Reload ld.d $t8, $sp, 16 # 8-byte Folded Reload ld.d $fp, $sp, 368 # 8-byte Folded Reload - b .LBB0_162 + b .LBB0_163 .p2align 4, , 16 -.LBB0_161: # %._crit_edge109.i - # in Loop: Header=BB0_162 Depth=1 +.LBB0_162: # %._crit_edge109.i + # in Loop: Header=BB0_163 Depth=1 addi.d $t6, $t6, 1 - beq $t6, $a1, .LBB0_175 -.LBB0_162: # %iter.check663 + beq $t6, $a1, .LBB0_176 +.LBB0_163: # %iter.check663 # =>This Loop Header: Depth=1 - # Child Loop BB0_167 Depth 2 - # Child Loop BB0_171 Depth 2 - # Child Loop BB0_174 Depth 2 + # Child Loop BB0_168 Depth 2 + # Child Loop BB0_172 Depth 2 + # Child Loop BB0_175 Depth 2 slli.d $t2, $t6, 3 ldx.d $t1, $fp, $t2 ldx.d $t2, $t8, $t2 move $t4, $zero - bltu $ra, $a6, .LBB0_173 -# %bb.163: # %iter.check663 - # in Loop: Header=BB0_162 Depth=1 + bltu $ra, $a6, .LBB0_174 +# %bb.164: # %iter.check663 + # in Loop: Header=BB0_163 Depth=1 sub.d $t3, $t2, $t1 - bltu $t3, $a7, .LBB0_173 -# %bb.164: # %vector.main.loop.iter.check665 - # in Loop: Header=BB0_162 Depth=1 - bgeu $ra, $t0, .LBB0_166 -# %bb.165: # in Loop: Header=BB0_162 Depth=1 + bltu $t3, $a7, .LBB0_174 +# %bb.165: # %vector.main.loop.iter.check665 + # in Loop: Header=BB0_163 Depth=1 + bgeu $ra, $t0, .LBB0_167 +# %bb.166: # in Loop: Header=BB0_163 Depth=1 move $t5, $zero - b .LBB0_170 + b .LBB0_171 .p2align 4, , 16 -.LBB0_166: # %vector.body671.preheader - # in Loop: Header=BB0_162 Depth=1 +.LBB0_167: # %vector.body671.preheader + # in Loop: Header=BB0_163 Depth=1 addi.d $t3, $t2, 32 addi.d $t4, $t1, 32 move $t5, $a3 .p2align 4, , 16 -.LBB0_167: # %vector.body671 - # Parent Loop BB0_162 Depth=1 +.LBB0_168: # %vector.body671 + # Parent Loop BB0_163 Depth=1 # => This Inner Loop Header: Depth=2 xvld $xr2, $t4, -32 xvld $xr3, $t4, 0 @@ -1915,23 +1843,23 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $t5, $t5, -16 addi.d $t3, $t3, 64 addi.d $t4, $t4, 64 - bnez $t5, .LBB0_167 -# %bb.168: # %middle.block676 - # in Loop: Header=BB0_162 Depth=1 - beq $ra, $a3, .LBB0_161 -# %bb.169: # %vec.epilog.iter.check680 - # in Loop: Header=BB0_162 Depth=1 + bnez $t5, .LBB0_168 +# %bb.169: # %middle.block676 + # in Loop: Header=BB0_163 Depth=1 + beq $ra, $a3, .LBB0_162 +# %bb.170: # %vec.epilog.iter.check680 + # in Loop: Header=BB0_163 Depth=1 move $t5, $a3 move $t4, $a3 - beqz $a2, .LBB0_173 -.LBB0_170: # %vec.epilog.ph682 - # in Loop: Header=BB0_162 Depth=1 + beqz $a2, .LBB0_174 +.LBB0_171: # %vec.epilog.ph682 + # in Loop: Header=BB0_163 Depth=1 add.d $t3, $a5, $t5 alsl.d $t4, $t5, $t2, 2 alsl.d $t5, $t5, $t1, 2 .p2align 4, , 16 -.LBB0_171: # %vec.epilog.vector.body687 - # Parent Loop BB0_162 Depth=1 +.LBB0_172: # %vec.epilog.vector.body687 + # Parent Loop BB0_163 Depth=1 # => This Inner Loop Header: Depth=2 vld $vr2, $t5, 0 vfdiv.s $vr2, $vr2, $vr1 @@ -1939,19 +1867,19 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $t3, $t3, 4 addi.d $t4, $t4, 16 addi.d $t5, $t5, 16 - bnez $t3, .LBB0_171 -# %bb.172: # %vec.epilog.middle.block691 - # in Loop: Header=BB0_162 Depth=1 + bnez $t3, .LBB0_172 +# %bb.173: # %vec.epilog.middle.block691 + # in Loop: Header=BB0_163 Depth=1 move $t4, $a4 - beq $ra, $a4, .LBB0_161 -.LBB0_173: # %vec.epilog.scalar.ph679.preheader - # in Loop: Header=BB0_162 Depth=1 + beq $ra, $a4, .LBB0_162 +.LBB0_174: # %vec.epilog.scalar.ph679.preheader + # in Loop: Header=BB0_163 Depth=1 sub.d $t3, $ra, $t4 alsl.d $t2, $t4, $t2, 2 alsl.d $t1, $t4, $t1, 2 .p2align 4, , 16 -.LBB0_174: # %vec.epilog.scalar.ph679 - # Parent Loop BB0_162 Depth=1 +.LBB0_175: # %vec.epilog.scalar.ph679 + # Parent Loop BB0_163 Depth=1 # => This Inner Loop Header: Depth=2 fld.s $fa2, $t1, 0 fdiv.s $fa2, $fa2, $fa4 @@ -1959,9 +1887,9 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $t3, $t3, -1 addi.d $t2, $t2, 4 addi.d $t1, $t1, 4 - bnez $t3, .LBB0_174 - b .LBB0_161 -.LBB0_175: # %._crit_edge112.i + bnez $t3, .LBB0_175 + b .LBB0_162 +.LBB0_176: # %._crit_edge112.i pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 ld.d $a0, $sp, 48 # 8-byte Folded Reload @@ -1970,7 +1898,7 @@ Lalignmm_hmout: # @Lalignmm_hmout ld.d $a0, $sp, 416 # 8-byte Folded Reload pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 184 # 8-byte Folded Reload pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 ld.d $a0, $sp, 352 # 8-byte Folded Reload @@ -1982,16 +1910,16 @@ Lalignmm_hmout: # @Lalignmm_hmout ld.d $a0, $sp, 360 # 8-byte Folded Reload pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 208 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload pcaddu18i $ra, %call36(FreeIntVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 216 # 8-byte Folded Reload + ld.d $a0, $sp, 208 # 8-byte Folded Reload pcaddu18i $ra, %call36(FreeIntVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 64 # 8-byte Folded Reload + ld.d $a0, $sp, 72 # 8-byte Folded Reload pcaddu18i $ra, %call36(FreeIntVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 56 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload pcaddu18i $ra, %call36(FreeIntVec) jirl $ra, $ra, 0 ld.d $a0, $sp, 32 # 8-byte Folded Reload @@ -2000,7 +1928,7 @@ Lalignmm_hmout: # @Lalignmm_hmout ld.d $a0, $sp, 24 # 8-byte Folded Reload pcaddu18i $ra, %call36(FreeIntVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 200 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 ld.d $a0, $sp, 448 # 8-byte Folded Reload @@ -2018,17 +1946,17 @@ Lalignmm_hmout: # @Lalignmm_hmout ld.d $a0, $sp, 320 # 8-byte Folded Reload pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 -.LBB0_176: # %MSalignmm_rec.exit +.LBB0_177: # %MSalignmm_rec.exit ld.d $a0, $sp, 232 # 8-byte Folded Reload pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 160 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 ld.d $a0, $sp, 392 # 8-byte Folded Reload pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 224 # 8-byte Folded Reload + ld.d $a0, $sp, 216 # 8-byte Folded Reload pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 ld.d $a0, $sp, 384 # 8-byte Folded Reload @@ -2040,53 +1968,53 @@ Lalignmm_hmout: # @Lalignmm_hmout ld.d $a0, $sp, 96 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 # 8-byte Folded Reload + move $a0, $s4 pcaddu18i $ra, %call36(FreeCharMtx) jirl $ra, $ra, 0 - move $a0, $s6 + ld.d $a0, $sp, 224 # 8-byte Folded Reload pcaddu18i $ra, %call36(FreeCharMtx) jirl $ra, $ra, 0 ld.d $a0, $s8, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 move $fp, $a0 - blez $s5, .LBB0_180 -# %bb.177: # %.lr.ph199 - ld.d $a0, $s4, 0 + blez $s6, .LBB0_181 +# %bb.178: # %.lr.ph199 + ld.d $a0, $s5, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 move $s0, $zero addi.w $s1, $a0, 0 - move $s2, $s5 + move $s2, $s6 .p2align 4, , 16 -.LBB0_178: # =>This Inner Loop Header: Depth=1 - ld.d $a0, $s4, 0 +.LBB0_179: # =>This Inner Loop Header: Depth=1 + ld.d $a0, $s5, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 - bne $a0, $s1, .LBB0_257 -# %bb.179: # in Loop: Header=BB0_178 Depth=1 + bne $a0, $s1, .LBB0_258 +# %bb.180: # in Loop: Header=BB0_179 Depth=1 addi.w $s0, $s0, 1 addi.d $s2, $s2, -1 - addi.d $s4, $s4, 8 - bnez $s2, .LBB0_178 -.LBB0_180: # %.preheader - blez $s7, .LBB0_184 -# %bb.181: # %.lr.ph201 + addi.d $s5, $s5, 8 + bnez $s2, .LBB0_179 +.LBB0_181: # %.preheader + blez $s7, .LBB0_185 +# %bb.182: # %.lr.ph201 move $s0, $zero addi.w $fp, $fp, 0 move $s1, $s7 .p2align 4, , 16 -.LBB0_182: # =>This Inner Loop Header: Depth=1 +.LBB0_183: # =>This Inner Loop Header: Depth=1 ld.d $a0, $s8, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 - bne $a0, $fp, .LBB0_258 -# %bb.183: # in Loop: Header=BB0_182 Depth=1 + bne $a0, $fp, .LBB0_259 +# %bb.184: # in Loop: Header=BB0_183 Depth=1 addi.w $s0, $s0, 1 addi.d $s1, $s1, -1 addi.d $s8, $s8, 8 - bnez $s1, .LBB0_182 -.LBB0_184: # %._crit_edge202 + bnez $s1, .LBB0_183 +.LBB0_185: # %._crit_edge202 movgr2fr.w $fa0, $zero fld.d $fs0, $sp, 464 # 8-byte Folded Reload ld.d $s8, $sp, 472 # 8-byte Folded Reload @@ -2102,26 +2030,26 @@ Lalignmm_hmout: # @Lalignmm_hmout ld.d $ra, $sp, 552 # 8-byte Folded Reload addi.d $sp, $sp, 560 ret -.LBB0_185: # %vector.memcheck432 +.LBB0_186: # %vector.memcheck432 alsl.d $a3, $a0, $t1, 2 addi.d $a1, $a1, -4 sltu $a1, $t2, $a1 sltu $a4, $s3, $a3 and $a1, $a1, $a4 - bnez $a1, .LBB0_91 -# %bb.186: # %vector.memcheck432 + bnez $a1, .LBB0_92 +# %bb.187: # %vector.memcheck432 addi.d $a1, $s6, 8 sltu $a1, $t2, $a1 sltu $a3, $s5, $a3 and $a1, $a1, $a3 - bnez $a1, .LBB0_91 -# %bb.187: # %vector.main.loop.iter.check449 + bnez $a1, .LBB0_92 +# %bb.188: # %vector.main.loop.iter.check449 ori $a1, $zero, 17 - bge $t6, $a1, .LBB0_222 -# %bb.188: + bge $t6, $a1, .LBB0_223 +# %bb.189: move $a1, $zero - b .LBB0_226 -.LBB0_189: # %vector.memcheck480 + b .LBB0_227 +.LBB0_190: # %vector.memcheck480 slli.d $a0, $a5, 2 alsl.d $a1, $a5, $s3, 2 addi.d $a3, $a0, 4 @@ -2130,52 +2058,52 @@ Lalignmm_hmout: # @Lalignmm_hmout sltu $a4, $a2, $a1 and $a4, $a0, $a4 move $a0, $zero - ld.d $t1, $sp, 224 # 8-byte Folded Reload - ld.d $t2, $sp, 112 # 8-byte Folded Reload + ld.d $t1, $sp, 216 # 8-byte Folded Reload + ld.d $t2, $sp, 104 # 8-byte Folded Reload ld.d $s2, $sp, 344 # 8-byte Folded Reload ld.d $t0, $sp, 368 # 8-byte Folded Reload - bnez $a4, .LBB0_206 -# %bb.190: # %vector.memcheck480 + bnez $a4, .LBB0_207 +# %bb.191: # %vector.memcheck480 add.d $a3, $s6, $a3 sltu $a3, $s3, $a3 sltu $a1, $s5, $a1 and $a1, $a3, $a1 ld.d $t3, $sp, 360 # 8-byte Folded Reload ld.d $t4, $sp, 352 # 8-byte Folded Reload - ld.d $t6, $sp, 200 # 8-byte Folded Reload - ld.d $t7, $sp, 184 # 8-byte Folded Reload + ld.d $t6, $sp, 192 # 8-byte Folded Reload + ld.d $t7, $sp, 176 # 8-byte Folded Reload ld.d $fp, $sp, 408 # 8-byte Folded Reload move $a7, $s8 - bnez $a1, .LBB0_110 -# %bb.191: # %vector.main.loop.iter.check496 + bnez $a1, .LBB0_111 +# %bb.192: # %vector.main.loop.iter.check496 ori $a0, $zero, 16 - bgeu $t5, $a0, .LBB0_229 -# %bb.192: + bgeu $t5, $a0, .LBB0_230 +# %bb.193: move $a0, $zero - b .LBB0_233 -.LBB0_193: # %vector.memcheck528 + b .LBB0_234 +.LBB0_194: # %vector.memcheck528 addi.d $a4, $fp, 4 add.d $a3, $t1, $a4 sltu $a3, $a7, $a3 sltu $a5, $a0, $a1 and $a5, $a3, $a5 move $a3, $zero - bnez $a5, .LBB0_114 -# %bb.194: # %vector.memcheck528 + bnez $a5, .LBB0_115 +# %bb.195: # %vector.memcheck528 addi.d $a5, $s7, 4 add.d $a4, $s7, $a4 sltu $a4, $a7, $a4 sltu $a5, $a5, $a1 and $a4, $a4, $a5 - bnez $a4, .LBB0_114 -# %bb.195: # %vector.main.loop.iter.check545 + bnez $a4, .LBB0_115 +# %bb.196: # %vector.main.loop.iter.check545 ori $a3, $zero, 17 ld.d $a4, $sp, 424 # 8-byte Folded Reload - bgeu $a4, $a3, .LBB0_236 -# %bb.196: + bgeu $a4, $a3, .LBB0_237 +# %bb.197: move $a3, $zero - b .LBB0_240 -.LBB0_197: # %vector.memcheck577 + b .LBB0_241 +.LBB0_198: # %vector.memcheck577 alsl.d $a4, $t7, $a2, 2 addi.d $a5, $fp, 4 add.d $a3, $t1, $a5 @@ -2183,22 +2111,22 @@ Lalignmm_hmout: # @Lalignmm_hmout sltu $a6, $a0, $a4 and $a6, $a3, $a6 move $a3, $zero - bnez $a6, .LBB0_120 -# %bb.198: # %vector.memcheck577 + bnez $a6, .LBB0_121 +# %bb.199: # %vector.memcheck577 addi.d $a6, $s7, 4 add.d $a5, $s7, $a5 sltu $a5, $a2, $a5 sltu $a4, $a6, $a4 and $a4, $a5, $a4 - bnez $a4, .LBB0_120 -# %bb.199: # %vector.main.loop.iter.check594 + bnez $a4, .LBB0_121 +# %bb.200: # %vector.main.loop.iter.check594 ori $a3, $zero, 17 ld.d $a4, $sp, 424 # 8-byte Folded Reload - bgeu $a4, $a3, .LBB0_243 -# %bb.200: + bgeu $a4, $a3, .LBB0_244 +# %bb.201: move $a3, $zero - b .LBB0_247 -.LBB0_201: # %vector.memcheck626 + b .LBB0_248 +.LBB0_202: # %vector.memcheck626 sub.d $a5, $a5, $t2 nor $a5, $a5, $zero bstrpick.d $a5, $a5, 31, 0 @@ -2213,14 +2141,14 @@ Lalignmm_hmout: # @Lalignmm_hmout sltu $a6, $a6, $s0 and $a1, $a1, $a6 ld.d $t8, $sp, 304 # 8-byte Folded Reload - bnez $a1, .LBB0_207 -# %bb.202: # %vector.memcheck626 + bnez $a1, .LBB0_208 +# %bb.203: # %vector.memcheck626 sltu $a0, $a5, $a0 sltu $a1, $a3, $s0 and $a1, $a0, $a1 move $a0, $t7 - bnez $a1, .LBB0_124 -# %bb.203: # %vector.ph641 + bnez $a1, .LBB0_125 +# %bb.204: # %vector.ph641 bstrpick.d $a0, $a4, 31, 0 addi.d $a1, $a0, 1 bstrpick.d $a0, $a1, 32, 3 @@ -2236,7 +2164,7 @@ Lalignmm_hmout: # @Lalignmm_hmout add.d $a7, $t0, $a7 move $t0, $a4 .p2align 4, , 16 -.LBB0_204: # %vector.body646 +.LBB0_205: # %vector.body646 # =>This Inner Loop Header: Depth=1 xvld $xr2, $a7, 0 xvfadd.s $xr2, $xr2, $xr1 @@ -2246,20 +2174,20 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a5, $a5, -32 addi.d $a6, $a6, -32 addi.d $a7, $a7, -32 - bnez $t0, .LBB0_204 -# %bb.205: # %middle.block655 + bnez $t0, .LBB0_205 +# %bb.206: # %middle.block655 move $a7, $s8 - bne $a1, $a4, .LBB0_124 - b .LBB0_126 -.LBB0_206: + bne $a1, $a4, .LBB0_125 + b .LBB0_127 +.LBB0_207: ld.d $t3, $sp, 360 # 8-byte Folded Reload ld.d $t4, $sp, 352 # 8-byte Folded Reload - ld.d $t6, $sp, 200 # 8-byte Folded Reload - b .LBB0_109 -.LBB0_207: + ld.d $t6, $sp, 192 # 8-byte Folded Reload + b .LBB0_110 +.LBB0_208: move $a0, $t7 - b .LBB0_124 -.LBB0_208: # %vector.ph323 + b .LBB0_125 +.LBB0_209: # %vector.ph323 andi $a2, $a0, 12 move $a1, $a0 xvldrepl.w $xr0, $s6, 0 @@ -2268,7 +2196,7 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a4, $s4, 36 move $a5, $a1 .p2align 4, , 16 -.LBB0_209: # %vector.body326 +.LBB0_210: # %vector.body326 # =>This Inner Loop Header: Depth=1 xvld $xr1, $a3, -32 xvld $xr2, $a3, 0 @@ -2283,12 +2211,12 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a3, $a3, 64 addi.d $a5, $a5, -16 addi.d $a4, $a4, 64 - bnez $a5, .LBB0_209 -# %bb.210: # %middle.block335 - beq $a0, $a1, .LBB0_48 -# %bb.211: # %vec.epilog.iter.check - beqz $a2, .LBB0_251 -.LBB0_212: # %vec.epilog.ph + bnez $a5, .LBB0_210 +# %bb.211: # %middle.block335 + beq $a0, $a1, .LBB0_49 +# %bb.212: # %vec.epilog.iter.check + beqz $a2, .LBB0_252 +.LBB0_213: # %vec.epilog.ph move $a2, $a0 bstrins.d $a2, $zero, 1, 0 ori $a3, $zero, 1 @@ -2300,7 +2228,7 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a4, $a4, 4 sub.d $a1, $a1, $a2 .p2align 4, , 16 -.LBB0_213: # %vec.epilog.vector.body +.LBB0_214: # %vec.epilog.vector.body # =>This Inner Loop Header: Depth=1 vld $vr1, $a3, 0 vld $vr2, $a4, 0 @@ -2310,11 +2238,11 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a3, $a3, 16 addi.d $a1, $a1, 4 addi.d $a4, $a4, 16 - bnez $a1, .LBB0_213 -# %bb.214: # %vec.epilog.middle.block - bne $a0, $a2, .LBB0_46 - b .LBB0_48 -.LBB0_215: # %vector.ph365 + bnez $a1, .LBB0_214 +# %bb.215: # %vec.epilog.middle.block + bne $a0, $a2, .LBB0_47 + b .LBB0_49 +.LBB0_216: # %vector.ph365 addi.d $a3, $a0, -1 andi $a2, $a3, 12 xvldrepl.w $xr0, $s7, 0 @@ -2323,7 +2251,7 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a5, $s3, 36 move $a6, $a3 .p2align 4, , 16 -.LBB0_216: # %vector.body368 +.LBB0_217: # %vector.body368 # =>This Inner Loop Header: Depth=1 xvld $xr1, $a4, -32 xvld $xr2, $a4, 0 @@ -2338,13 +2266,13 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a4, $a4, 64 addi.d $a6, $a6, -16 addi.d $a5, $a5, 64 - bnez $a6, .LBB0_216 -# %bb.217: # %middle.block377 + bnez $a6, .LBB0_217 +# %bb.218: # %middle.block377 addi.d $a4, $a0, -1 - beq $a4, $a3, .LBB0_51 -# %bb.218: # %vec.epilog.iter.check381 - beqz $a2, .LBB0_252 -.LBB0_219: # %vec.epilog.ph383 + beq $a4, $a3, .LBB0_52 +# %bb.219: # %vec.epilog.iter.check381 + beqz $a2, .LBB0_253 +.LBB0_220: # %vec.epilog.ph383 addi.d $a2, $a0, -1 move $a4, $a2 bstrins.d $a4, $zero, 1, 0 @@ -2356,7 +2284,7 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a6, $a6, 4 sub.d $a3, $a3, $a4 .p2align 4, , 16 -.LBB0_220: # %vec.epilog.vector.body386 +.LBB0_221: # %vec.epilog.vector.body386 # =>This Inner Loop Header: Depth=1 vld $vr1, $a5, 0 vld $vr2, $a6, 0 @@ -2366,12 +2294,12 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a5, $a5, 16 addi.d $a3, $a3, 4 addi.d $a6, $a6, 16 - bnez $a3, .LBB0_220 -# %bb.221: # %vec.epilog.middle.block394 + bnez $a3, .LBB0_221 +# %bb.222: # %vec.epilog.middle.block394 addi.d $a3, $a0, -1 - bne $a3, $a4, .LBB0_49 - b .LBB0_51 -.LBB0_222: # %vector.ph450 + bne $a3, $a4, .LBB0_50 + b .LBB0_52 +.LBB0_223: # %vector.ph450 addi.d $a1, $a0, -1 andi $a2, $a1, 12 bstrins.d $a1, $zero, 3, 0 @@ -2383,7 +2311,7 @@ Lalignmm_hmout: # @Lalignmm_hmout xvrepli.b $xr1, 0 move $a6, $a1 .p2align 4, , 16 -.LBB0_223: # %vector.body453 +.LBB0_224: # %vector.body453 # =>This Inner Loop Header: Depth=1 xvld $xr2, $a3, -32 xvld $xr3, $a3, 0 @@ -2397,13 +2325,13 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a3, $a3, 64 addi.d $a4, $a4, 64 addi.d $a5, $a5, 64 - bnez $a6, .LBB0_223 -# %bb.224: # %middle.block460 + bnez $a6, .LBB0_224 +# %bb.225: # %middle.block460 addi.d $a3, $a0, -1 - beq $a3, $a1, .LBB0_93 -# %bb.225: # %vec.epilog.iter.check464 - beqz $a2, .LBB0_253 -.LBB0_226: # %vec.epilog.ph466 + beq $a3, $a1, .LBB0_94 +# %bb.226: # %vec.epilog.iter.check464 + beqz $a2, .LBB0_254 +.LBB0_227: # %vec.epilog.ph466 addi.d $a2, $a0, -1 move $a3, $a2 bstrins.d $a3, $zero, 1, 0 @@ -2419,7 +2347,7 @@ Lalignmm_hmout: # @Lalignmm_hmout sub.d $a1, $a1, $a3 vrepli.b $vr1, 0 .p2align 4, , 16 -.LBB0_227: # %vec.epilog.vector.body469 +.LBB0_228: # %vec.epilog.vector.body469 # =>This Inner Loop Header: Depth=1 vld $vr2, $a4, 0 vfadd.s $vr2, $vr2, $vr0 @@ -2429,12 +2357,12 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a5, $a5, 16 addi.d $a1, $a1, 4 addi.d $a6, $a6, 16 - bnez $a1, .LBB0_227 -# %bb.228: # %vec.epilog.middle.block476 + bnez $a1, .LBB0_228 +# %bb.229: # %vec.epilog.middle.block476 addi.d $a1, $a0, -1 - bne $a1, $a3, .LBB0_91 - b .LBB0_93 -.LBB0_229: # %vector.ph497 + bne $a1, $a3, .LBB0_92 + b .LBB0_94 +.LBB0_230: # %vector.ph497 andi $a1, $a5, 12 bstrpick.d $a0, $a5, 30, 4 xvldrepl.w $xr0, $a2, 0 @@ -2443,7 +2371,7 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a4, $s3, 32 move $a5, $a0 .p2align 4, , 16 -.LBB0_230: # %vector.body500 +.LBB0_231: # %vector.body500 # =>This Inner Loop Header: Depth=1 xvld $xr1, $a3, -32 xvld $xr2, $a3, 0 @@ -2458,13 +2386,13 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a3, $a3, 64 addi.d $a5, $a5, -16 addi.d $a4, $a4, 64 - bnez $a5, .LBB0_230 -# %bb.231: # %middle.block509 - beq $a0, $ra, .LBB0_112 -# %bb.232: # %vec.epilog.iter.check513 + bnez $a5, .LBB0_231 +# %bb.232: # %middle.block509 + beq $a0, $ra, .LBB0_113 +# %bb.233: # %vec.epilog.iter.check513 move $a5, $ra - beqz $a1, .LBB0_110 -.LBB0_233: # %vec.epilog.ph515 + beqz $a1, .LBB0_111 +.LBB0_234: # %vec.epilog.ph515 move $a4, $a0 bstrpick.d $a0, $a5, 30, 2 slli.d $a0, $a0, 2 @@ -2474,7 +2402,7 @@ Lalignmm_hmout: # @Lalignmm_hmout sub.d $a3, $a4, $a0 alsl.d $a4, $a4, $s3, 2 .p2align 4, , 16 -.LBB0_234: # %vec.epilog.vector.body518 +.LBB0_235: # %vec.epilog.vector.body518 # =>This Inner Loop Header: Depth=1 vld $vr1, $a1, 0 vld $vr2, $a4, 0 @@ -2484,11 +2412,11 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a1, $a1, 16 addi.d $a3, $a3, 4 addi.d $a4, $a4, 16 - bnez $a3, .LBB0_234 -# %bb.235: # %vec.epilog.middle.block525 - bne $a0, $a5, .LBB0_110 - b .LBB0_112 -.LBB0_236: # %vector.ph546 + bnez $a3, .LBB0_235 +# %bb.236: # %vec.epilog.middle.block525 + bne $a0, $a5, .LBB0_111 + b .LBB0_113 +.LBB0_237: # %vector.ph546 andi $a4, $t7, 12 bstrpick.d $a3, $t7, 30, 4 xvldrepl.w $xr0, $a0, 0 @@ -2497,7 +2425,7 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a6, $a7, 32 move $a7, $a3 .p2align 4, , 16 -.LBB0_237: # %vector.body549 +.LBB0_238: # %vector.body549 # =>This Inner Loop Header: Depth=1 xvld $xr1, $a5, -32 xvld $xr2, $a5, 0 @@ -2512,13 +2440,13 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a5, $a5, 64 addi.d $a7, $a7, -16 addi.d $a6, $a6, 64 - bnez $a7, .LBB0_237 -# %bb.238: # %middle.block558 + bnez $a7, .LBB0_238 +# %bb.239: # %middle.block558 move $a7, $s8 - beq $a3, $t7, .LBB0_116 -# %bb.239: # %vec.epilog.iter.check562 - beqz $a4, .LBB0_114 -.LBB0_240: # %vec.epilog.ph564 + beq $a3, $t7, .LBB0_117 +# %bb.240: # %vec.epilog.iter.check562 + beqz $a4, .LBB0_115 +.LBB0_241: # %vec.epilog.ph564 move $a6, $a3 bstrpick.d $a3, $t7, 30, 2 slli.d $a3, $a3, 2 @@ -2528,7 +2456,7 @@ Lalignmm_hmout: # @Lalignmm_hmout sub.d $a5, $a6, $a3 alsl.d $a6, $a6, $a7, 2 .p2align 4, , 16 -.LBB0_241: # %vec.epilog.vector.body567 +.LBB0_242: # %vec.epilog.vector.body567 # =>This Inner Loop Header: Depth=1 vld $vr1, $a4, 0 vld $vr2, $a6, 0 @@ -2538,11 +2466,11 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a4, $a4, 16 addi.d $a5, $a5, 4 addi.d $a6, $a6, 16 - bnez $a5, .LBB0_241 -# %bb.242: # %vec.epilog.middle.block574 - bne $a3, $t7, .LBB0_114 - b .LBB0_116 -.LBB0_243: # %vector.ph595 + bnez $a5, .LBB0_242 +# %bb.243: # %vec.epilog.middle.block574 + bne $a3, $t7, .LBB0_115 + b .LBB0_117 +.LBB0_244: # %vector.ph595 andi $a4, $t7, 12 bstrpick.d $a3, $t7, 30, 4 xvldrepl.w $xr0, $a0, 0 @@ -2551,7 +2479,7 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a6, $a2, 32 move $a7, $a3 .p2align 4, , 16 -.LBB0_244: # %vector.body598 +.LBB0_245: # %vector.body598 # =>This Inner Loop Header: Depth=1 xvld $xr1, $a5, -32 xvld $xr2, $a5, 0 @@ -2566,13 +2494,13 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a5, $a5, 64 addi.d $a7, $a7, -16 addi.d $a6, $a6, 64 - bnez $a7, .LBB0_244 -# %bb.245: # %middle.block607 + bnez $a7, .LBB0_245 +# %bb.246: # %middle.block607 move $a7, $s8 - beq $a3, $t7, .LBB0_122 -# %bb.246: # %vec.epilog.iter.check611 - beqz $a4, .LBB0_120 -.LBB0_247: # %vec.epilog.ph613 + beq $a3, $t7, .LBB0_123 +# %bb.247: # %vec.epilog.iter.check611 + beqz $a4, .LBB0_121 +.LBB0_248: # %vec.epilog.ph613 move $a6, $a3 bstrpick.d $a3, $t7, 30, 2 slli.d $a3, $a3, 2 @@ -2582,7 +2510,7 @@ Lalignmm_hmout: # @Lalignmm_hmout sub.d $a5, $a6, $a3 alsl.d $a6, $a6, $a2, 2 .p2align 4, , 16 -.LBB0_248: # %vec.epilog.vector.body616 +.LBB0_249: # %vec.epilog.vector.body616 # =>This Inner Loop Header: Depth=1 vld $vr1, $a4, 0 vld $vr2, $a6, 0 @@ -2592,46 +2520,46 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a4, $a4, 16 addi.d $a5, $a5, 4 addi.d $a6, $a6, 16 - bnez $a5, .LBB0_248 -# %bb.249: # %vec.epilog.middle.block623 - bne $a3, $t7, .LBB0_120 - b .LBB0_122 -.LBB0_250: - addi.d $a3, $a4, 1 - b .LBB0_88 + bnez $a5, .LBB0_249 +# %bb.250: # %vec.epilog.middle.block623 + bne $a3, $t7, .LBB0_121 + b .LBB0_123 .LBB0_251: - addi.d $fp, $a1, 1 - b .LBB0_46 + addi.d $a3, $a4, 1 + b .LBB0_89 .LBB0_252: - addi.d $a2, $a3, 1 - b .LBB0_49 + addi.d $fp, $a1, 1 + b .LBB0_47 .LBB0_253: - addi.d $a2, $a1, 1 - b .LBB0_91 + addi.d $a2, $a3, 1 + b .LBB0_50 .LBB0_254: + addi.d $a2, $a1, 1 + b .LBB0_92 +.LBB0_255: pcalau12i $a0, %got_pc_hi20(stderr) ld.d $s0, $a0, %got_pc_lo12(stderr) ld.d $a0, $s0, 0 pcalau12i $a1, %pc_hi20(.L.str) addi.d $a1, $a1, %pc_lo12(.L.str) move $a2, $fp - ld.d $a3, $sp, 120 # 8-byte Folded Reload + ld.d $a3, $sp, 112 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ld.d $a3, $s0, 0 - b .LBB0_256 -.LBB0_255: + b .LBB0_257 +.LBB0_256: pcalau12i $a0, %got_pc_hi20(stderr) ld.d $fp, $a0, %got_pc_lo12(stderr) ld.d $a0, $fp, 0 pcalau12i $a1, %pc_hi20(.L.str.2) addi.d $a1, $a1, %pc_lo12(.L.str.2) - move $a2, $s4 + move $a2, $s5 ld.d $a3, $sp, 88 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ld.d $a3, $fp, 0 -.LBB0_256: +.LBB0_257: pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a0, $a0, %pc_lo12(.L.str.1) ori $a1, $zero, 27 @@ -2641,16 +2569,16 @@ Lalignmm_hmout: # @Lalignmm_hmout ori $a0, $zero, 1 pcaddu18i $ra, %call36(exit) jirl $ra, $ra, 0 -.LBB0_257: +.LBB0_258: pcalau12i $a0, %got_pc_hi20(stderr) ld.d $fp, $a0, %got_pc_lo12(stderr) ld.d $a0, $fp, 0 pcalau12i $a1, %pc_hi20(.L.str) addi.d $a1, $a1, %pc_lo12(.L.str) move $a2, $s0 - move $a3, $s5 - b .LBB0_259 -.LBB0_258: + move $a3, $s6 + b .LBB0_260 +.LBB0_259: pcalau12i $a0, %got_pc_hi20(stderr) ld.d $fp, $a0, %got_pc_lo12(stderr) ld.d $a0, $fp, 0 @@ -2658,7 +2586,7 @@ Lalignmm_hmout: # @Lalignmm_hmout addi.d $a1, $a1, %pc_lo12(.L.str.2) move $a2, $s0 move $a3, $s7 -.LBB0_259: +.LBB0_260: pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ld.d $a3, $fp, 0 @@ -2674,7 +2602,19 @@ Lalignmm_hmout: # @Lalignmm_hmout .Lfunc_end0: .size Lalignmm_hmout, .Lfunc_end0-Lalignmm_hmout # -- End function - .globl Lalign2m2m_hmout # -- Begin function Lalign2m2m_hmout + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 # -- Begin function Lalign2m2m_hmout +.LCPI1_0: + .word 0 # 0x0 + .word 1 # 0x1 + .word 4 # 0x4 + .word 5 # 0x5 + .word 2 # 0x2 + .word 3 # 0x3 + .word 6 # 0x6 + .word 7 # 0x7 + .text + .globl Lalign2m2m_hmout .p2align 5 .type Lalign2m2m_hmout,@function Lalign2m2m_hmout: # @Lalign2m2m_hmout @@ -2693,18 +2633,18 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout st.d $s8, $sp, 472 # 8-byte Folded Spill fst.d $fs0, $sp, 464 # 8-byte Folded Spill ld.d $s0, $sp, 568 - ld.d $s3, $sp, 560 + ld.d $s4, $sp, 560 move $s1, $a7 - move $s2, $a6 + move $s3, $a6 st.d $a5, $sp, 456 # 8-byte Folded Spill - move $s5, $a4 + move $s6, $a4 st.d $a3, $sp, 448 # 8-byte Folded Spill move $s7, $a2 move $fp, $a1 move $s8, $a0 pcalau12i $a0, %got_pc_hi20(penalty) ld.d $a0, $a0, %got_pc_lo12(penalty) - ld.w $s6, $a0, 0 + ld.w $s2, $a0, 0 ld.d $a0, $s8, 0 pcaddu18i $ra, %call36(seqlen) jirl $ra, $ra, 0 @@ -2721,15 +2661,15 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout move $a0, $a1 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 - move $s4, $a0 + move $s5, $a0 add.d $a0, $s8, $a0 addi.w $fp, $a0, 200 - move $a0, $s3 + move $a0, $s4 move $a1, $fp pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 st.d $a0, $sp, 56 # 8-byte Folded Spill - st.d $s0, $sp, 120 # 8-byte Folded Spill + st.d $s0, $sp, 112 # 8-byte Folded Spill move $a0, $s0 move $a1, $fp pcaddu18i $ra, %call36(AllocateCharMtx) @@ -2745,8 +2685,8 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 st.d $a0, $sp, 232 # 8-byte Folded Spill - st.d $s4, $sp, 104 # 8-byte Folded Spill - addi.w $fp, $s4, 102 + st.d $s5, $sp, 104 # 8-byte Folded Spill + addi.w $fp, $s5, 102 move $a0, $fp pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 @@ -2772,164 +2712,160 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout jirl $ra, $ra, 0 st.d $a0, $sp, 376 # 8-byte Folded Spill st.d $s8, $sp, 160 # 8-byte Folded Spill - addi.w $s4, $s8, 0 - st.d $s3, $sp, 112 # 8-byte Folded Spill - blez $s3, .LBB1_4 + addi.w $s5, $s8, 0 + blez $s4, .LBB1_4 # %bb.1: # %.lr.ph move $fp, $zero - ld.d $s8, $sp, 112 # 8-byte Folded Reload + move $s8, $s4 ld.d $s0, $sp, 88 # 8-byte Folded Reload .p2align 4, , 16 .LBB1_2: # =>This Inner Loop Header: Depth=1 ld.d $a0, $s0, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 - bne $a0, $s4, .LBB1_255 + bne $a0, $s5, .LBB1_255 # %bb.3: # in Loop: Header=BB1_2 Depth=1 addi.w $fp, $fp, 1 addi.d $s8, $s8, -1 addi.d $s0, $s0, 8 bnez $s8, .LBB1_2 .LBB1_4: # %.preheader188 + st.d $s4, $sp, 120 # 8-byte Folded Spill ld.d $a0, $sp, 104 # 8-byte Folded Reload addi.w $fp, $a0, 0 - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 112 # 8-byte Folded Reload blez $a0, .LBB1_8 # %bb.5: # %.lr.ph195 move $s8, $zero - ld.d $s0, $sp, 120 # 8-byte Folded Reload - ld.d $s3, $sp, 80 # 8-byte Folded Reload + ld.d $s0, $sp, 112 # 8-byte Folded Reload + ld.d $s4, $sp, 80 # 8-byte Folded Reload .p2align 4, , 16 .LBB1_6: # =>This Inner Loop Header: Depth=1 - ld.d $a0, $s3, 0 + ld.d $a0, $s4, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 bne $a0, $fp, .LBB1_256 # %bb.7: # in Loop: Header=BB1_6 Depth=1 addi.w $s8, $s8, 1 addi.d $s0, $s0, -1 - addi.d $s3, $s3, 8 + addi.d $s4, $s4, 8 bnez $s0, .LBB1_6 .LBB1_8: # %._crit_edge ld.d $s8, $sp, 584 - movgr2fr.w $fa0, $s6 + movgr2fr.w $fa0, $s2 ffint.s.w $fs0, $fa0 ld.d $s0, $sp, 88 # 8-byte Folded Reload move $a0, $s0 move $a1, $s7 - move $a2, $s5 + move $a2, $s6 ld.d $a3, $sp, 384 # 8-byte Folded Reload - move $a4, $s2 - move $a5, $s4 - ld.d $s5, $sp, 112 # 8-byte Folded Reload - move $a6, $s5 + move $a4, $s3 + move $a5, $s5 + ld.d $s4, $sp, 120 # 8-byte Folded Reload + move $a6, $s4 pcaddu18i $ra, %call36(cpmx_ribosum) jirl $ra, $ra, 0 - ld.d $s6, $sp, 80 # 8-byte Folded Reload - move $a0, $s6 + ld.d $s7, $sp, 80 # 8-byte Folded Reload + move $a0, $s7 ld.d $a1, $sp, 448 # 8-byte Folded Reload ld.d $a2, $sp, 456 # 8-byte Folded Reload ld.d $a3, $sp, 376 # 8-byte Folded Reload move $a4, $s1 move $a5, $fp - move $s3, $s1 - ld.d $s1, $sp, 120 # 8-byte Folded Reload - move $a6, $s1 + ld.d $s6, $sp, 112 # 8-byte Folded Reload + move $a6, $s6 pcaddu18i $ra, %call36(cpmx_ribosum) jirl $ra, $ra, 0 beqz $s8, .LBB1_10 # %bb.9: - ld.d $a0, $sp, 608 - st.d $a0, $sp, 456 # 8-byte Folded Spill - move $s7, $s4 - ld.d $s4, $sp, 592 + ld.d $s4, $sp, 608 + move $s2, $s5 + ld.d $s5, $sp, 592 ld.d $a0, $sp, 232 # 8-byte Folded Reload - move $a1, $s5 + ld.d $a1, $sp, 120 # 8-byte Folded Reload move $a2, $s0 - move $a3, $s2 - move $a4, $s7 + move $a3, $s3 + move $a4, $s2 move $a5, $s8 pcaddu18i $ra, %call36(new_OpeningGapCount) jirl $ra, $ra, 0 ld.d $a0, $sp, 152 # 8-byte Folded Reload - move $a1, $s1 - move $a2, $s6 - move $a3, $s3 + move $a1, $s6 + move $a2, $s7 + move $a3, $s1 move $a4, $fp - move $a5, $s4 + move $a5, $s5 pcaddu18i $ra, %call36(new_OpeningGapCount) jirl $ra, $ra, 0 ld.d $a0, $sp, 392 # 8-byte Folded Reload - move $a1, $s5 + ld.d $a1, $sp, 120 # 8-byte Folded Reload move $a2, $s0 - move $a3, $s2 - move $a4, $s7 - ld.d $s2, $sp, 456 # 8-byte Folded Reload - move $a5, $s2 + move $a3, $s3 + move $a4, $s2 + move $a5, $s4 pcaddu18i $ra, %call36(new_FinalGapCount) jirl $ra, $ra, 0 ld.d $a0, $sp, 216 # 8-byte Folded Reload - move $s8, $s1 - move $a1, $s1 - move $a2, $s6 - move $a3, $s3 + move $a1, $s6 + move $a2, $s7 + move $a3, $s1 move $a4, $fp - move $a5, $s2 + move $a5, $s4 + ld.d $s4, $sp, 120 # 8-byte Folded Reload pcaddu18i $ra, %call36(new_FinalGapCount) jirl $ra, $ra, 0 b .LBB1_11 .LBB1_10: ld.d $a0, $sp, 232 # 8-byte Folded Reload - move $a1, $s5 + move $a1, $s4 move $a2, $s0 - move $a3, $s2 - move $a4, $s4 + move $a3, $s3 + move $a4, $s5 pcaddu18i $ra, %call36(st_OpeningGapCount) jirl $ra, $ra, 0 ld.d $a0, $sp, 152 # 8-byte Folded Reload - move $a1, $s1 - move $a2, $s6 - move $a3, $s3 + move $a1, $s6 + move $a2, $s7 + move $a3, $s1 move $a4, $fp pcaddu18i $ra, %call36(st_OpeningGapCount) jirl $ra, $ra, 0 ld.d $a0, $sp, 392 # 8-byte Folded Reload - move $a1, $s5 + move $a1, $s4 move $a2, $s0 - move $a3, $s2 - move $a4, $s4 + move $a3, $s3 + move $a4, $s5 pcaddu18i $ra, %call36(st_FinalGapCount) jirl $ra, $ra, 0 ld.d $a0, $sp, 216 # 8-byte Folded Reload - move $s8, $s1 - move $a1, $s1 - move $a2, $s6 - move $a3, $s3 + move $a1, $s6 + move $a2, $s7 + move $a3, $s1 move $a4, $fp pcaddu18i $ra, %call36(st_FinalGapCount) jirl $ra, $ra, 0 - move $s7, $s4 + move $s2, $s5 .LBB1_11: - move $s4, $s0 + move $s5, $s0 fcvt.d.s $fa0, $fs0 - ld.d $s5, $sp, 56 # 8-byte Folded Reload + ld.d $s8, $sp, 56 # 8-byte Folded Reload ld.d $a5, $sp, 216 # 8-byte Folded Reload ld.d $s0, $sp, 104 # 8-byte Folded Reload - st.d $s7, $sp, 432 # 8-byte Folded Spill - blez $s7, .LBB1_18 + st.d $s2, $sp, 432 # 8-byte Folded Spill + blez $s2, .LBB1_18 # %bb.12: # %.lr.ph198 ld.d $a0, $sp, 160 # 8-byte Folded Reload bstrpick.d $a0, $a0, 30, 0 ori $a1, $zero, 8 bltu $a0, $a1, .LBB1_15 # %bb.13: # %vector.memcheck - ld.d $a3, $sp, 392 # 8-byte Folded Reload - alsl.d $a1, $a0, $a3, 2 - ld.d $a2, $sp, 232 # 8-byte Folded Reload - bgeu $a2, $a1, .LBB1_68 -# %bb.14: # %vector.memcheck - alsl.d $a1, $a0, $a2, 2 + ld.d $a4, $sp, 392 # 8-byte Folded Reload + alsl.d $a1, $a0, $a4, 2 + ld.d $a3, $sp, 232 # 8-byte Folded Reload bgeu $a3, $a1, .LBB1_68 +# %bb.14: # %vector.memcheck + alsl.d $a1, $a0, $a3, 2 + bgeu $a4, $a1, .LBB1_68 .LBB1_15: move $a1, $zero .LBB1_16: # %scalar.ph.preheader @@ -2962,7 +2898,6 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout addi.d $a3, $a3, 4 bnez $a0, .LBB1_17 .LBB1_18: # %.preheader187 - ld.d $s7, $sp, 112 # 8-byte Folded Reload blez $fp, .LBB1_25 # %bb.19: # %.lr.ph200 bstrpick.d $a0, $s0, 30, 0 @@ -2970,10 +2905,10 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout bltu $a0, $a1, .LBB1_22 # %bb.20: # %vector.memcheck262 alsl.d $a1, $a0, $a5, 2 - ld.d $a2, $sp, 152 # 8-byte Folded Reload - bgeu $a2, $a1, .LBB1_71 + ld.d $a3, $sp, 152 # 8-byte Folded Reload + bgeu $a3, $a1, .LBB1_71 # %bb.21: # %vector.memcheck262 - alsl.d $a1, $a0, $a2, 2 + alsl.d $a1, $a0, $a3, 2 bgeu $a5, $a1, .LBB1_71 .LBB1_22: move $a1, $zero @@ -3020,29 +2955,29 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout st.w $a1, $a0, %pc_lo12(reccycle) blez $fp, .LBB1_58 # %bb.26: - move $a0, $s7 + move $a0, $s4 move $a1, $zero pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 move $fp, $a0 - move $a0, $s8 + move $a0, $s6 move $a1, $zero pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 - blez $s7, .LBB1_34 + blez $s4, .LBB1_34 # %bb.27: # %.lr.ph.preheader.i ori $a2, $zero, 8 move $a1, $zero - bltu $s7, $a2, .LBB1_32 + bltu $s4, $a2, .LBB1_32 # %bb.28: # %.lr.ph.preheader.i - sub.d $a2, $fp, $s5 + sub.d $a2, $fp, $s8 ori $a3, $zero, 64 bltu $a2, $a3, .LBB1_32 # %bb.29: # %vector.ph286 - bstrpick.d $a1, $s7, 30, 3 + bstrpick.d $a1, $s4, 30, 3 slli.d $a1, $a1, 3 addi.d $a2, $fp, 32 - addi.d $a3, $s5, 32 + addi.d $a3, $s8, 32 move $a4, $a1 .p2align 4, , 16 .LBB1_30: # %vector.body289 @@ -3056,11 +2991,11 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout addi.d $a3, $a3, 64 bnez $a4, .LBB1_30 # %bb.31: # %middle.block294 - beq $a1, $s7, .LBB1_34 + beq $a1, $s4, .LBB1_34 .LBB1_32: # %.lr.ph.i.preheader - sub.d $a2, $s7, $a1 + sub.d $a2, $s4, $a1 alsl.d $a3, $a1, $fp, 3 - alsl.d $a1, $a1, $s5, 3 + alsl.d $a1, $a1, $s8, 3 .p2align 4, , 16 .LBB1_33: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 @@ -3071,18 +3006,18 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout addi.d $a1, $a1, 8 bnez $a2, .LBB1_33 .LBB1_34: # %.preheader13.i - blez $s8, .LBB1_42 + blez $s6, .LBB1_42 # %bb.35: # %.lr.ph17.preheader.i ori $a2, $zero, 8 move $a1, $zero - bltu $s8, $a2, .LBB1_40 + bltu $s6, $a2, .LBB1_40 # %bb.36: # %.lr.ph17.preheader.i ld.d $a2, $sp, 224 # 8-byte Folded Reload sub.d $a2, $a0, $a2 ori $a3, $zero, 64 bltu $a2, $a3, .LBB1_40 # %bb.37: # %vector.ph301 - bstrpick.d $a1, $s8, 30, 3 + bstrpick.d $a1, $s6, 30, 3 slli.d $a1, $a1, 3 addi.d $a2, $a0, 32 ld.d $a3, $sp, 224 # 8-byte Folded Reload @@ -3100,9 +3035,9 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout addi.d $a3, $a3, 64 bnez $a4, .LBB1_38 # %bb.39: # %middle.block309 - beq $a1, $s8, .LBB1_42 + beq $a1, $s6, .LBB1_42 .LBB1_40: # %.lr.ph17.i.preheader - sub.d $a2, $s8, $a1 + sub.d $a2, $s6, $a1 alsl.d $a3, $a1, $a0, 3 ld.d $a4, $sp, 224 # 8-byte Folded Reload alsl.d $a1, $a1, $a4, 3 @@ -3124,10 +3059,10 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout ori $a1, $zero, 9 bgeu $a1, $a2, .LBB1_67 # %bb.44: # %iter.check - ld.d $a0, $sp, 160 # 8-byte Folded Reload - addi.w $s2, $a0, 100 + ld.d $s6, $sp, 160 # 8-byte Folded Reload move $s8, $a2 - addi.w $s3, $s0, 100 + addi.w $s3, $s6, 100 + addi.w $s7, $s0, 100 ld.d $s0, $sp, 440 # 8-byte Folded Reload move $a0, $s0 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -3190,32 +3125,32 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 st.d $a0, $sp, 448 # 8-byte Folded Spill - sltu $a0, $s3, $s2 - masknez $a1, $s3, $a0 - maskeqz $a0, $s2, $a0 + sltu $a0, $s7, $s3 + masknez $a1, $s7, $a0 + maskeqz $a0, $s3, $a0 or $a0, $a0, $a1 - addi.w $s7, $a0, 2 - move $a0, $s7 + addi.w $s2, $a0, 2 + move $a0, $s2 pcaddu18i $ra, %call36(AllocateCharVec) jirl $ra, $ra, 0 ori $a1, $zero, 26 - move $a0, $s7 + move $a0, $s2 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 move $s5, $a0 ori $a1, $zero, 26 - move $a0, $s7 + move $a0, $s2 pcaddu18i $ra, %call36(AllocateIntMtx) jirl $ra, $ra, 0 - move $s6, $a0 - move $a0, $s2 - move $a1, $s3 + move $s2, $a0 + move $a0, $s3 + move $a1, $s7 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 st.d $a0, $sp, 368 # 8-byte Folded Spill - move $a0, $s2 - move $a1, $s3 + move $a0, $s3 move $s3, $fp + move $a1, $s7 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 st.d $a0, $sp, 320 # 8-byte Folded Spill @@ -3229,7 +3164,7 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout move $a3, $zero ld.d $a4, $sp, 432 # 8-byte Folded Reload move $a5, $s5 - move $a6, $s6 + move $a6, $s2 pcaddu18i $ra, %call36(match_ribosum) jirl $ra, $ra, 0 ori $a7, $zero, 1 @@ -3241,12 +3176,11 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout move $a4, $s8 st.d $s5, $sp, 336 # 8-byte Folded Spill move $a5, $s5 - st.d $s6, $sp, 328 # 8-byte Folded Spill - move $a6, $s6 + st.d $s2, $sp, 328 # 8-byte Folded Spill + move $a6, $s2 pcaddu18i $ra, %call36(match_ribosum) jirl $ra, $ra, 0 - ld.d $a0, $sp, 160 # 8-byte Folded Reload - addi.w $a1, $a0, 1 + addi.w $a1, $s6, 1 ori $a0, $zero, 2 slt $a2, $a0, $a1 masknez $a0, $a0, $a2 @@ -3350,11 +3284,11 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout b .LBB1_86 .LBB1_58: # %.preheader1.i ld.d $s2, $sp, 432 # 8-byte Folded Reload - blez $s7, .LBB1_61 + blez $s4, .LBB1_61 # %bb.59: # %.lr.ph114.i - move $fp, $s5 - move $s0, $s4 - move $s1, $s7 + move $fp, $s8 + move $s0, $s5 + move $s1, $s4 .p2align 4, , 16 .LBB1_60: # =>This Inner Loop Header: Depth=1 ld.d $a0, $fp, 0 @@ -3369,7 +3303,7 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout addi.d $fp, $fp, 8 bnez $s1, .LBB1_60 .LBB1_61: # %.preheader.i - blez $s8, .LBB1_177 + blez $s6, .LBB1_177 # %bb.62: # %.lr.ph121.i move $s0, $zero ori $s1, $zero, 45 @@ -3378,7 +3312,7 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout .LBB1_63: # %._crit_edge119.i # in Loop: Header=BB1_64 Depth=1 addi.d $s0, $s0, 1 - beq $s0, $s8, .LBB1_177 + beq $s0, $s6, .LBB1_177 .LBB1_64: # =>This Loop Header: Depth=1 # Child Loop BB1_66 Depth 2 slli.d $a0, $s0, 3 @@ -3417,121 +3351,83 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout ld.d $a1, $sp, 160 # 8-byte Folded Reload bstrpick.d $a1, $a1, 30, 3 slli.d $a1, $a1, 3 - xvreplve0.d $xr1, $xr0 - xvldi $xr2, -912 - xvldi $xr3, -928 + pcalau12i $a2, %pc_hi20(.LCPI1_0) + xvld $xr1, $a2, %pc_lo12(.LCPI1_0) + xvreplve0.d $xr2, $xr0 + xvldi $xr3, -912 + xvldi $xr4, -928 + move $a2, $a3 + move $a3, $a4 move $a4, $a1 .p2align 4, , 16 .LBB1_69: # %vector.body # =>This Inner Loop Header: Depth=1 - xvld $xr4, $a2, 0 - xvpermi.q $xr5, $xr4, 1 - vreplvei.w $vr6, $vr5, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr5, 2 + xvld $xr5, $a2, 0 + xvpermi.q $xr6, $xr5, 1 + vreplvei.w $vr7, $vr6, 3 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr5, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr6, 16 - xvpermi.q $xr5, $xr7, 2 - vreplvei.w $vr6, $vr4, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr4, 2 + vreplvei.w $vr8, $vr6, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr7, 16 + vreplvei.w $vr7, $vr6, 1 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr4, 1 + vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vreplvei.w $vr4, $vr4, 0 - fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr6, 16 - xvpermi.q $xr4, $xr7, 2 - xvfsub.d $xr5, $xr2, $xr5 - xvfsub.d $xr4, $xr2, $xr4 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr4, $xr4, $xr3 - xvfmul.d $xr5, $xr5, $xr1 - xvfmul.d $xr4, $xr4, $xr1 - xvpickve.d $xr6, $xr5, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr5, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr5, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 48 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - xvld $xr5, $a3, 0 - vextrins.w $vr6, $vr4, 48 - xvpermi.q $xr6, $xr7, 2 - xvst $xr6, $a2, 0 - xvpermi.q $xr4, $xr5, 1 - vreplvei.w $vr6, $vr4, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr4, 2 + vextrins.d $vr6, $vr7, 16 + xvpermi.q $xr6, $xr8, 2 + vreplvei.w $vr7, $vr5, 3 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr4, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr4, $vr4, 0 - fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr6, 16 - xvpermi.q $xr4, $xr7, 2 + vreplvei.w $vr8, $vr5, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr7, 16 + vreplvei.w $vr7, $vr5, 1 + fcvt.d.s $fa7, $fa7 + vreplvei.w $vr5, $vr5, 0 + fcvt.d.s $fa5, $fa5 + vextrins.d $vr5, $vr7, 16 + xvpermi.q $xr5, $xr8, 2 + xvfsub.d $xr6, $xr3, $xr6 + xvfsub.d $xr5, $xr3, $xr5 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr5, $xr5, $xr4 + xvfmul.d $xr6, $xr6, $xr2 + xvfmul.d $xr5, $xr5, $xr2 + xvld $xr7, $a3, 0 + xvfcvt.s.d $xr5, $xr6, $xr5 + xvperm.w $xr5, $xr5, $xr1 + xvst $xr5, $a2, 0 + xvpermi.q $xr5, $xr7, 1 vreplvei.w $vr6, $vr5, 3 fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr5, 2 - fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 + vreplvei.w $vr8, $vr5, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr6, 16 vreplvei.w $vr6, $vr5, 1 fcvt.d.s $fa6, $fa6 vreplvei.w $vr5, $vr5, 0 fcvt.d.s $fa5, $fa5 vextrins.d $vr5, $vr6, 16 - xvpermi.q $xr5, $xr7, 2 - xvfsub.d $xr4, $xr2, $xr4 - xvfsub.d $xr5, $xr2, $xr5 - xvfmul.d $xr4, $xr4, $xr3 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr4, $xr4, $xr1 - xvfmul.d $xr5, $xr5, $xr1 - xvpickve.d $xr6, $xr4, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr4, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr4, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr7, $vr4, 48 - xvpickve.d $xr4, $xr5, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr6, $xr5, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr4, 16 - xvpickve.d $xr4, $xr5, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 32 - xvpickve.d $xr4, $xr5, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpermi.q $xr6, $xr7, 2 - xvst $xr6, $a3, 0 + xvpermi.q $xr5, $xr8, 2 + vreplvei.w $vr6, $vr7, 3 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr8, $vr7, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr6, 16 + vreplvei.w $vr6, $vr7, 1 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr7, $vr7, 0 + fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr6, 16 + xvpermi.q $xr7, $xr8, 2 + xvfsub.d $xr5, $xr3, $xr5 + xvfsub.d $xr6, $xr3, $xr7 + xvfmul.d $xr5, $xr5, $xr4 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr5, $xr5, $xr2 + xvfmul.d $xr6, $xr6, $xr2 + xvfcvt.s.d $xr5, $xr5, $xr6 + xvperm.w $xr5, $xr5, $xr1 + xvst $xr5, $a3, 0 addi.d $a4, $a4, -8 addi.d $a3, $a3, 32 addi.d $a2, $a2, 32 @@ -3542,122 +3438,83 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout .LBB1_71: # %vector.ph270 bstrpick.d $a1, $s0, 30, 3 slli.d $a1, $a1, 3 - xvreplve0.d $xr1, $xr0 - xvldi $xr2, -912 - xvldi $xr3, -928 + pcalau12i $a2, %pc_hi20(.LCPI1_0) + xvld $xr1, $a2, %pc_lo12(.LCPI1_0) + xvreplve0.d $xr2, $xr0 + xvldi $xr3, -912 + xvldi $xr4, -928 + move $a2, $a3 move $a3, $a5 move $a4, $a1 .p2align 4, , 16 .LBB1_72: # %vector.body275 # =>This Inner Loop Header: Depth=1 - xvld $xr4, $a2, 0 - xvpermi.q $xr5, $xr4, 1 - vreplvei.w $vr6, $vr5, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr5, 2 + xvld $xr5, $a2, 0 + xvpermi.q $xr6, $xr5, 1 + vreplvei.w $vr7, $vr6, 3 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr5, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr6, 16 - xvpermi.q $xr5, $xr7, 2 - vreplvei.w $vr6, $vr4, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr4, 2 + vreplvei.w $vr8, $vr6, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr7, 16 + vreplvei.w $vr7, $vr6, 1 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr4, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr4, $vr4, 0 - fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr6, 16 - xvpermi.q $xr4, $xr7, 2 - xvfsub.d $xr5, $xr2, $xr5 - xvfsub.d $xr4, $xr2, $xr4 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr4, $xr4, $xr3 - xvfmul.d $xr5, $xr5, $xr1 - xvfmul.d $xr4, $xr4, $xr1 - xvpickve.d $xr6, $xr5, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr5, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr5, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 48 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - xvld $xr5, $a3, 0 - vextrins.w $vr6, $vr4, 48 - xvpermi.q $xr6, $xr7, 2 - xvst $xr6, $a2, 0 - xvpermi.q $xr4, $xr5, 1 - vreplvei.w $vr6, $vr4, 3 + vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr4, 2 + vextrins.d $vr6, $vr7, 16 + xvpermi.q $xr6, $xr8, 2 + vreplvei.w $vr7, $vr5, 3 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr4, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr4, $vr4, 0 - fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr6, 16 - xvpermi.q $xr4, $xr7, 2 + vreplvei.w $vr8, $vr5, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr7, 16 + vreplvei.w $vr7, $vr5, 1 + fcvt.d.s $fa7, $fa7 + vreplvei.w $vr5, $vr5, 0 + fcvt.d.s $fa5, $fa5 + vextrins.d $vr5, $vr7, 16 + xvpermi.q $xr5, $xr8, 2 + xvfsub.d $xr6, $xr3, $xr6 + xvfsub.d $xr5, $xr3, $xr5 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr5, $xr5, $xr4 + xvfmul.d $xr6, $xr6, $xr2 + xvfmul.d $xr5, $xr5, $xr2 + xvld $xr7, $a3, 0 + xvfcvt.s.d $xr5, $xr6, $xr5 + xvperm.w $xr5, $xr5, $xr1 + xvst $xr5, $a2, 0 + xvpermi.q $xr5, $xr7, 1 vreplvei.w $vr6, $vr5, 3 fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr5, 2 - fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 + vreplvei.w $vr8, $vr5, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr6, 16 vreplvei.w $vr6, $vr5, 1 fcvt.d.s $fa6, $fa6 vreplvei.w $vr5, $vr5, 0 fcvt.d.s $fa5, $fa5 vextrins.d $vr5, $vr6, 16 - xvpermi.q $xr5, $xr7, 2 - xvfsub.d $xr4, $xr2, $xr4 - xvfsub.d $xr5, $xr2, $xr5 - xvfmul.d $xr4, $xr4, $xr3 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr4, $xr4, $xr1 - xvfmul.d $xr5, $xr5, $xr1 - xvpickve.d $xr6, $xr4, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr4, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr4, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr7, $vr4, 48 - xvpickve.d $xr4, $xr5, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr6, $xr5, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr4, 16 - xvpickve.d $xr4, $xr5, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 32 - xvpickve.d $xr4, $xr5, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpermi.q $xr6, $xr7, 2 - xvst $xr6, $a3, 0 + xvpermi.q $xr5, $xr8, 2 + vreplvei.w $vr6, $vr7, 3 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr8, $vr7, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr6, 16 + vreplvei.w $vr6, $vr7, 1 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr7, $vr7, 0 + fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr6, 16 + xvpermi.q $xr7, $xr8, 2 + xvfsub.d $xr5, $xr3, $xr5 + xvfsub.d $xr6, $xr3, $xr7 + xvfmul.d $xr5, $xr5, $xr4 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr5, $xr5, $xr2 + xvfmul.d $xr6, $xr6, $xr2 + xvfcvt.s.d $xr5, $xr5, $xr6 + xvperm.w $xr5, $xr5, $xr1 + xvst $xr5, $a3, 0 addi.d $a4, $a4, -8 addi.d $a3, $a3, 32 addi.d $a2, $a2, 32 @@ -4549,11 +4406,11 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout ori $a6, $zero, 4 ori $a7, $zero, 64 ori $t0, $zero, 16 - ld.d $s8, $sp, 120 # 8-byte Folded Reload - ld.d $s6, $sp, 80 # 8-byte Folded Reload - ld.d $s7, $sp, 112 # 8-byte Folded Reload - ld.d $s4, $sp, 88 # 8-byte Folded Reload - ld.d $s5, $sp, 56 # 8-byte Folded Reload + ld.d $s6, $sp, 112 # 8-byte Folded Reload + ld.d $s7, $sp, 80 # 8-byte Folded Reload + ld.d $s4, $sp, 120 # 8-byte Folded Reload + ld.d $s5, $sp, 88 # 8-byte Folded Reload + ld.d $s8, $sp, 56 # 8-byte Folded Reload ld.d $a0, $sp, 40 # 8-byte Folded Reload ld.d $t8, $sp, 16 # 8-byte Folded Reload ld.d $fp, $sp, 368 # 8-byte Folded Reload @@ -4727,51 +4584,51 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout ld.d $a0, $sp, 96 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - move $a0, $s5 + move $a0, $s8 pcaddu18i $ra, %call36(FreeCharMtx) jirl $ra, $ra, 0 ld.d $a0, $sp, 224 # 8-byte Folded Reload pcaddu18i $ra, %call36(FreeCharMtx) jirl $ra, $ra, 0 - ld.d $a0, $s6, 0 + ld.d $a0, $s7, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 move $fp, $a0 - blez $s7, .LBB1_181 + blez $s4, .LBB1_181 # %bb.178: # %.lr.ph203 - ld.d $a0, $s4, 0 + ld.d $a0, $s5, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 move $s0, $zero addi.w $s1, $a0, 0 - move $s2, $s7 + move $s2, $s4 .p2align 4, , 16 .LBB1_179: # =>This Inner Loop Header: Depth=1 - ld.d $a0, $s4, 0 + ld.d $a0, $s5, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 bne $a0, $s1, .LBB1_258 # %bb.180: # in Loop: Header=BB1_179 Depth=1 addi.w $s0, $s0, 1 addi.d $s2, $s2, -1 - addi.d $s4, $s4, 8 + addi.d $s5, $s5, 8 bnez $s2, .LBB1_179 .LBB1_181: # %.preheader - blez $s8, .LBB1_185 + blez $s6, .LBB1_185 # %bb.182: # %.lr.ph205 move $s0, $zero addi.w $fp, $fp, 0 - move $s1, $s8 + move $s1, $s6 .p2align 4, , 16 .LBB1_183: # =>This Inner Loop Header: Depth=1 - ld.d $a0, $s6, 0 + ld.d $a0, $s7, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 bne $a0, $fp, .LBB1_259 # %bb.184: # in Loop: Header=BB1_183 Depth=1 addi.w $s0, $s0, 1 addi.d $s1, $s1, -1 - addi.d $s6, $s6, 8 + addi.d $s7, $s7, 8 bnez $s1, .LBB1_183 .LBB1_185: # %._crit_edge206 movgr2fr.w $fa0, $zero @@ -5302,7 +5159,7 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout pcalau12i $a1, %pc_hi20(.L.str) addi.d $a1, $a1, %pc_lo12(.L.str) move $a2, $fp - ld.d $a3, $sp, 112 # 8-byte Folded Reload + move $a3, $s4 pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ld.d $a3, $s0, 0 @@ -5314,7 +5171,7 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout pcalau12i $a1, %pc_hi20(.L.str.2) addi.d $a1, $a1, %pc_lo12(.L.str.2) move $a2, $s8 - ld.d $a3, $sp, 120 # 8-byte Folded Reload + ld.d $a3, $sp, 112 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ld.d $a3, $fp, 0 @@ -5335,7 +5192,7 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout pcalau12i $a1, %pc_hi20(.L.str) addi.d $a1, $a1, %pc_lo12(.L.str) move $a2, $s0 - move $a3, $s7 + move $a3, $s4 b .LBB1_260 .LBB1_259: pcalau12i $a0, %got_pc_hi20(stderr) @@ -5344,7 +5201,7 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout pcalau12i $a1, %pc_hi20(.L.str.2) addi.d $a1, $a1, %pc_lo12(.L.str.2) move $a2, $s0 - move $a3, $s8 + move $a3, $s6 .LBB1_260: pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/MSalignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/MSalignmm.s index ae6b103..617e31a 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/MSalignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/MSalignmm.s @@ -1,6 +1,17 @@ .file "MSalignmm.c" + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 # -- Begin function MSalignmm +.LCPI0_0: + .word 0 # 0x0 + .word 1 # 0x1 + .word 4 # 0x4 + .word 5 # 0x5 + .word 2 # 0x2 + .word 3 # 0x3 + .word 6 # 0x6 + .word 7 # 0x7 .text - .globl MSalignmm # -- Begin function MSalignmm + .globl MSalignmm .p2align 5 .type MSalignmm,@function MSalignmm: # @MSalignmm @@ -18,21 +29,22 @@ MSalignmm: # @MSalignmm st.d $s7, $sp, 208 # 8-byte Folded Spill st.d $s8, $sp, 200 # 8-byte Folded Spill fst.d $fs0, $sp, 192 # 8-byte Folded Spill - st.d $a7, $sp, 72 # 8-byte Folded Spill - move $s0, $a5 - move $s1, $a4 - st.d $a3, $sp, 80 # 8-byte Folded Spill + st.d $a7, $sp, 80 # 8-byte Folded Spill + move $fp, $a5 + move $s0, $a4 + st.d $a3, $sp, 88 # 8-byte Folded Spill move $s5, $a2 move $s6, $a1 move $s2, $a0 pcalau12i $a0, %got_pc_hi20(penalty) ld.d $a0, $a0, %got_pc_lo12(penalty) - ld.w $fp, $a0, 0 + ld.w $a0, $a0, 0 + st.d $a0, $sp, 72 # 8-byte Folded Spill ld.d $a0, $s2, 0 pcaddu18i $ra, %call36(seqlen) jirl $ra, $ra, 0 ld.d $a1, $s6, 0 - st.d $a0, $sp, 112 # 8-byte Folded Spill + st.d $a0, $sp, 120 # 8-byte Folded Spill move $a0, $a1 pcaddu18i $ra, %call36(seqlen) jirl $ra, $ra, 0 @@ -42,84 +54,84 @@ MSalignmm: # @MSalignmm pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 ld.d $a1, $s6, 0 - move $s8, $a0 + move $s4, $a0 move $a0, $a1 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 move $s7, $a0 - add.d $a0, $s8, $a0 + add.d $a0, $s4, $a0 addi.w $s3, $a0, 200 - move $a0, $s1 + move $a0, $s0 move $a1, $s3 pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 - st.d $a0, $sp, 104 # 8-byte Folded Spill - move $s4, $s0 - move $a0, $s0 + st.d $a0, $sp, 112 # 8-byte Folded Spill + move $s1, $fp + move $a0, $fp move $a1, $s3 pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 - st.d $a0, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 104 # 8-byte Folded Spill ori $a0, $zero, 4 move $a1, $zero pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - st.d $a0, $sp, 88 # 8-byte Folded Spill - addi.w $s3, $s8, 102 + st.d $a0, $sp, 96 # 8-byte Folded Spill + addi.w $s3, $s4, 102 move $a0, $s3 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - st.d $a0, $sp, 176 # 8-byte Folded Spill - st.d $s7, $sp, 136 # 8-byte Folded Spill + st.d $a0, $sp, 184 # 8-byte Folded Spill + st.d $s7, $sp, 144 # 8-byte Folded Spill addi.w $s7, $s7, 102 move $a0, $s7 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - st.d $a0, $sp, 128 # 8-byte Folded Spill + st.d $a0, $sp, 136 # 8-byte Folded Spill move $a0, $s3 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - st.d $a0, $sp, 168 # 8-byte Folded Spill + st.d $a0, $sp, 176 # 8-byte Folded Spill move $a0, $s7 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - st.d $a0, $sp, 160 # 8-byte Folded Spill + st.d $a0, $sp, 168 # 8-byte Folded Spill ori $a1, $zero, 27 move $a0, $s3 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - st.d $a0, $sp, 152 # 8-byte Folded Spill + st.d $a0, $sp, 160 # 8-byte Folded Spill ori $a1, $zero, 27 move $a0, $s7 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - st.d $a0, $sp, 144 # 8-byte Folded Spill - st.d $s8, $sp, 120 # 8-byte Folded Spill - addi.w $s8, $s8, 0 - st.d $s1, $sp, 184 # 8-byte Folded Spill - blez $s1, .LBB0_4 + st.d $a0, $sp, 152 # 8-byte Folded Spill + st.d $s4, $sp, 128 # 8-byte Folded Spill + addi.w $s8, $s4, 0 + move $s4, $s0 + blez $s0, .LBB0_4 # %bb.1: # %.lr.ph move $s3, $zero - ld.d $s7, $sp, 184 # 8-byte Folded Reload - move $s1, $s2 + move $s7, $s4 + move $fp, $s2 .p2align 4, , 16 .LBB0_2: # =>This Inner Loop Header: Depth=1 - ld.d $a0, $s1, 0 + ld.d $a0, $fp, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 bne $a0, $s8, .LBB0_48 # %bb.3: # in Loop: Header=BB0_2 Depth=1 addi.w $s3, $s3, 1 addi.d $s7, $s7, -1 - addi.d $s1, $s1, 8 + addi.d $fp, $fp, 8 bnez $s7, .LBB0_2 .LBB0_4: # %.preheader209 - ld.d $a0, $sp, 136 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload addi.w $s3, $a0, 0 - blez $s4, .LBB0_8 + blez $s1, .LBB0_8 # %bb.5: # %.lr.ph216 move $s7, $zero - move $s1, $s4 + move $fp, $s1 move $s0, $s6 .p2align 4, , 16 .LBB0_6: # =>This Inner Loop Header: Depth=1 @@ -129,108 +141,109 @@ MSalignmm: # @MSalignmm bne $a0, $s3, .LBB0_49 # %bb.7: # in Loop: Header=BB0_6 Depth=1 addi.w $s7, $s7, 1 - addi.d $s1, $s1, -1 + addi.d $fp, $fp, -1 addi.d $s0, $s0, 8 - bnez $s1, .LBB0_6 + bnez $fp, .LBB0_6 .LBB0_8: # %._crit_edge - movgr2fr.w $fa0, $fp + ld.d $a0, $sp, 72 # 8-byte Folded Reload + movgr2fr.w $fa0, $a0 ffint.s.w $fs0, $fa0 move $a0, $s2 - ld.d $a1, $sp, 152 # 8-byte Folded Reload + ld.d $a1, $sp, 160 # 8-byte Folded Reload move $a2, $s5 move $a3, $s8 - ld.d $fp, $sp, 184 # 8-byte Folded Reload - move $a4, $fp + move $a4, $s4 pcaddu18i $ra, %call36(MScpmx_calc_new) jirl $ra, $ra, 0 move $a0, $s6 - ld.d $a1, $sp, 144 # 8-byte Folded Reload - ld.d $s1, $sp, 80 # 8-byte Folded Reload - move $a2, $s1 + ld.d $a1, $sp, 152 # 8-byte Folded Reload + ld.d $s0, $sp, 88 # 8-byte Folded Reload + move $a2, $s0 move $a3, $s3 - move $s7, $s4 - move $a4, $s4 + move $s7, $s1 + move $a4, $s1 pcaddu18i $ra, %call36(MScpmx_calc_new) jirl $ra, $ra, 0 - ld.d $a5, $sp, 72 # 8-byte Folded Reload + ld.d $a5, $sp, 80 # 8-byte Folded Reload beqz $a5, .LBB0_10 # %bb.9: ld.d $s7, $sp, 304 - ld.d $s0, $sp, 288 - ld.d $a0, $sp, 176 # 8-byte Folded Reload - move $a1, $fp + ld.d $fp, $sp, 288 + ld.d $a0, $sp, 184 # 8-byte Folded Reload + move $a1, $s4 move $a2, $s2 move $a3, $s5 move $a4, $s8 pcaddu18i $ra, %call36(new_OpeningGapCount) jirl $ra, $ra, 0 - ld.d $a0, $sp, 128 # 8-byte Folded Reload - move $a1, $s4 + ld.d $a0, $sp, 136 # 8-byte Folded Reload + move $a1, $s1 move $a2, $s6 - move $a3, $s1 + move $a3, $s0 move $a4, $s3 - move $a5, $s0 - move $s0, $a0 + move $a5, $fp + move $fp, $a0 pcaddu18i $ra, %call36(new_OpeningGapCount) jirl $ra, $ra, 0 - ld.d $a0, $sp, 168 # 8-byte Folded Reload - move $a1, $fp + ld.d $a0, $sp, 176 # 8-byte Folded Reload + move $a1, $s4 move $a2, $s2 move $a3, $s5 move $a4, $s8 move $a5, $s7 pcaddu18i $ra, %call36(new_FinalGapCount) jirl $ra, $ra, 0 - ld.d $a0, $sp, 160 # 8-byte Folded Reload - move $a1, $s4 + ld.d $a0, $sp, 168 # 8-byte Folded Reload + move $a1, $s1 move $a2, $s6 - move $a3, $s1 + move $a3, $s0 move $a4, $s3 move $a5, $s7 - move $s7, $s4 + move $s7, $s1 pcaddu18i $ra, %call36(new_FinalGapCount) jirl $ra, $ra, 0 b .LBB0_11 .LBB0_10: - ld.d $a0, $sp, 176 # 8-byte Folded Reload - move $a1, $fp + ld.d $a0, $sp, 184 # 8-byte Folded Reload + move $a1, $s4 move $a2, $s2 move $a3, $s5 move $a4, $s8 pcaddu18i $ra, %call36(st_OpeningGapCount) jirl $ra, $ra, 0 - ld.d $a0, $sp, 128 # 8-byte Folded Reload - move $s0, $a0 + ld.d $fp, $sp, 136 # 8-byte Folded Reload + move $a0, $fp move $a1, $s7 move $a2, $s6 - move $a3, $s1 + move $a3, $s0 move $a4, $s3 pcaddu18i $ra, %call36(st_OpeningGapCount) jirl $ra, $ra, 0 - ld.d $a0, $sp, 168 # 8-byte Folded Reload - move $a1, $fp + ld.d $a0, $sp, 176 # 8-byte Folded Reload + move $a1, $s4 move $a2, $s2 move $a3, $s5 move $a4, $s8 pcaddu18i $ra, %call36(st_FinalGapCount) jirl $ra, $ra, 0 - ld.d $a0, $sp, 160 # 8-byte Folded Reload + ld.d $a0, $sp, 168 # 8-byte Folded Reload move $a1, $s7 move $a2, $s6 - move $a3, $s1 + move $a3, $s0 move $a4, $s3 pcaddu18i $ra, %call36(st_FinalGapCount) jirl $ra, $ra, 0 .LBB0_11: - ld.d $s5, $sp, 112 # 8-byte Folded Reload + move $s1, $s4 fcvt.d.s $fa0, $fs0 - ld.d $s4, $sp, 96 # 8-byte Folded Reload - ld.d $a5, $sp, 168 # 8-byte Folded Reload - ld.d $a6, $sp, 160 # 8-byte Folded Reload - ld.d $a7, $sp, 176 # 8-byte Folded Reload + ld.d $s4, $sp, 112 # 8-byte Folded Reload + ld.d $s5, $sp, 104 # 8-byte Folded Reload + ld.d $a5, $sp, 176 # 8-byte Folded Reload + ld.d $a6, $sp, 168 # 8-byte Folded Reload + ld.d $a7, $sp, 184 # 8-byte Folded Reload blez $s8, .LBB0_18 # %bb.12: # %.lr.ph219 - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload bstrpick.d $a0, $a0, 30, 0 ori $a1, $zero, 8 bltu $a0, $a1, .LBB0_15 @@ -270,24 +283,24 @@ MSalignmm: # @MSalignmm addi.d $a3, $a3, 4 bnez $a0, .LBB0_17 .LBB0_18: # %.preheader208 - ld.d $s8, $sp, 88 # 8-byte Folded Reload + ld.d $s8, $sp, 96 # 8-byte Folded Reload blez $s3, .LBB0_25 # %bb.19: # %.lr.ph221 - ld.d $a0, $sp, 136 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload bstrpick.d $a0, $a0, 30, 0 ori $a1, $zero, 8 bltu $a0, $a1, .LBB0_22 # %bb.20: # %vector.memcheck285 alsl.d $a1, $a0, $a6, 2 - bgeu $s0, $a1, .LBB0_45 + bgeu $fp, $a1, .LBB0_45 # %bb.21: # %vector.memcheck285 - alsl.d $a1, $a0, $s0, 2 + alsl.d $a1, $a0, $fp, 2 bgeu $a6, $a1, .LBB0_45 .LBB0_22: move $a1, $zero .LBB0_23: # %scalar.ph291.preheader alsl.d $a2, $a1, $a6, 2 - alsl.d $a3, $a1, $s0, 2 + alsl.d $a3, $a1, $fp, 2 sub.d $a0, $a0, $a1 vldi $vr1, -912 vldi $vr2, -928 @@ -315,43 +328,41 @@ MSalignmm: # @MSalignmm .LBB0_25: # %._crit_edge222 st.d $a7, $s8, 0 st.d $a5, $s8, 8 - st.d $s0, $s8, 16 + st.d $fp, $s8, 16 st.d $a6, $s8, 24 - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload addi.w $a7, $a0, -1 - ld.d $a0, $sp, 136 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload addi.w $a0, $a0, -1 st.d $s8, $sp, 40 st.d $zero, $sp, 32 - st.d $s4, $sp, 24 - ld.d $s3, $sp, 104 # 8-byte Folded Reload - st.d $s3, $sp, 16 + st.d $s5, $sp, 24 + st.d $s4, $sp, 16 st.d $a0, $sp, 8 st.d $zero, $sp, 0 - ld.d $fp, $sp, 184 # 8-byte Folded Reload - move $a0, $fp + move $a0, $s1 move $a1, $s7 move $a2, $s2 move $a3, $s6 - ld.d $a4, $sp, 152 # 8-byte Folded Reload - ld.d $a5, $sp, 144 # 8-byte Folded Reload + ld.d $a4, $sp, 160 # 8-byte Folded Reload + ld.d $a5, $sp, 152 # 8-byte Folded Reload move $a6, $zero pcaddu18i $ra, %call36(MSalignmm_rec) jirl $ra, $ra, 0 fmov.s $fs0, $fa0 - blez $fp, .LBB0_28 + blez $s1, .LBB0_28 # %bb.26: # %.lr.ph225.preheader - ld.d $fp, $sp, 184 # 8-byte Folded Reload + move $fp, $s1 move $s0, $s2 - move $s1, $s3 + move $s3, $s4 .p2align 4, , 16 .LBB0_27: # %.lr.ph225 # =>This Inner Loop Header: Depth=1 ld.d $a0, $s0, 0 - ld.d $a1, $s1, 0 + ld.d $a1, $s3, 0 pcaddu18i $ra, %call36(strcpy) jirl $ra, $ra, 0 - addi.d $s1, $s1, 8 + addi.d $s3, $s3, 8 addi.d $fp, $fp, -1 addi.d $s0, $s0, 8 bnez $fp, .LBB0_27 @@ -360,15 +371,15 @@ MSalignmm: # @MSalignmm # %bb.29: # %.lr.ph227.preheader move $fp, $s7 move $s0, $s6 - move $s1, $s4 + move $s3, $s5 .p2align 4, , 16 .LBB0_30: # %.lr.ph227 # =>This Inner Loop Header: Depth=1 ld.d $a0, $s0, 0 - ld.d $a1, $s1, 0 + ld.d $a1, $s3, 0 pcaddu18i $ra, %call36(strcpy) jirl $ra, $ra, 0 - addi.d $s1, $s1, 8 + addi.d $s3, $s3, 8 addi.d $fp, $fp, -1 addi.d $s0, $s0, 8 bnez $fp, .LBB0_30 @@ -376,41 +387,41 @@ MSalignmm: # @MSalignmm ld.d $a0, $s2, 0 pcaddu18i $ra, %call36(seqlen) jirl $ra, $ra, 0 - bne $a0, $s5, .LBB0_54 + ld.d $s3, $sp, 120 # 8-byte Folded Reload + bne $a0, $s3, .LBB0_54 # %bb.32: ld.d $a0, $s6, 0 pcaddu18i $ra, %call36(seqlen) jirl $ra, $ra, 0 - ld.d $s1, $sp, 184 # 8-byte Folded Reload ld.d $s0, $sp, 64 # 8-byte Folded Reload - ld.d $fp, $sp, 128 # 8-byte Folded Reload + ld.d $fp, $sp, 136 # 8-byte Folded Reload bne $a0, $s0, .LBB0_55 # %bb.33: - ld.d $a0, $sp, 176 # 8-byte Folded Reload + ld.d $a0, $sp, 184 # 8-byte Folded Reload pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 move $a0, $fp pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 168 # 8-byte Folded Reload + ld.d $a0, $sp, 176 # 8-byte Folded Reload pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 160 # 8-byte Folded Reload + ld.d $a0, $sp, 168 # 8-byte Folded Reload pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 160 # 8-byte Folded Reload pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 move $a0, $s8 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - move $a0, $s3 + move $a0, $s4 pcaddu18i $ra, %call36(FreeCharMtx) jirl $ra, $ra, 0 - move $a0, $s4 + move $a0, $s5 pcaddu18i $ra, %call36(FreeCharMtx) jirl $ra, $ra, 0 ld.d $a0, $s6, 0 @@ -470,126 +481,86 @@ MSalignmm: # @MSalignmm addi.d $sp, $sp, 288 ret .LBB0_42: # %vector.ph - ld.d $a1, $sp, 120 # 8-byte Folded Reload + ld.d $a1, $sp, 128 # 8-byte Folded Reload bstrpick.d $a1, $a1, 30, 3 slli.d $a1, $a1, 3 - xvreplve0.d $xr1, $xr0 - xvldi $xr2, -912 - xvldi $xr3, -928 + pcalau12i $a2, %pc_hi20(.LCPI0_0) + xvld $xr1, $a2, %pc_lo12(.LCPI0_0) + xvreplve0.d $xr2, $xr0 + xvldi $xr3, -912 + xvldi $xr4, -928 move $a2, $a7 move $a3, $a5 move $a4, $a1 .p2align 4, , 16 .LBB0_43: # %vector.body # =>This Inner Loop Header: Depth=1 - xvld $xr4, $a2, 0 - xvpermi.q $xr5, $xr4, 1 - vreplvei.w $vr6, $vr5, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr5, 2 + xvld $xr5, $a2, 0 + xvpermi.q $xr6, $xr5, 1 + vreplvei.w $vr7, $vr6, 3 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr5, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr6, 16 - xvpermi.q $xr5, $xr7, 2 - vreplvei.w $vr6, $vr4, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr4, 2 + vreplvei.w $vr8, $vr6, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr7, 16 + vreplvei.w $vr7, $vr6, 1 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr4, 1 + vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vreplvei.w $vr4, $vr4, 0 - fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr6, 16 - xvpermi.q $xr4, $xr7, 2 - xvfsub.d $xr5, $xr2, $xr5 - xvfsub.d $xr4, $xr2, $xr4 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr4, $xr4, $xr3 - xvfmul.d $xr5, $xr5, $xr1 - xvfmul.d $xr4, $xr4, $xr1 - xvpickve.d $xr6, $xr5, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr5, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr5, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 48 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - xvld $xr5, $a3, 0 - vextrins.w $vr6, $vr4, 48 - xvpermi.q $xr6, $xr7, 2 - xvst $xr6, $a2, 0 - xvpermi.q $xr4, $xr5, 1 - vreplvei.w $vr6, $vr4, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr4, 2 + vextrins.d $vr6, $vr7, 16 + xvpermi.q $xr6, $xr8, 2 + vreplvei.w $vr7, $vr5, 3 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr4, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr4, $vr4, 0 - fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr6, 16 - xvpermi.q $xr4, $xr7, 2 + vreplvei.w $vr8, $vr5, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr7, 16 + vreplvei.w $vr7, $vr5, 1 + fcvt.d.s $fa7, $fa7 + vreplvei.w $vr5, $vr5, 0 + fcvt.d.s $fa5, $fa5 + vextrins.d $vr5, $vr7, 16 + xvpermi.q $xr5, $xr8, 2 + xvfsub.d $xr6, $xr3, $xr6 + xvfsub.d $xr5, $xr3, $xr5 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr5, $xr5, $xr4 + xvfmul.d $xr6, $xr6, $xr2 + xvfmul.d $xr5, $xr5, $xr2 + xvld $xr7, $a3, 0 + xvfcvt.s.d $xr5, $xr6, $xr5 + xvperm.w $xr5, $xr5, $xr1 + xvst $xr5, $a2, 0 + xvpermi.q $xr5, $xr7, 1 vreplvei.w $vr6, $vr5, 3 fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr5, 2 - fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 + vreplvei.w $vr8, $vr5, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr6, 16 vreplvei.w $vr6, $vr5, 1 fcvt.d.s $fa6, $fa6 vreplvei.w $vr5, $vr5, 0 fcvt.d.s $fa5, $fa5 vextrins.d $vr5, $vr6, 16 - xvpermi.q $xr5, $xr7, 2 - xvfsub.d $xr4, $xr2, $xr4 - xvfsub.d $xr5, $xr2, $xr5 - xvfmul.d $xr4, $xr4, $xr3 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr4, $xr4, $xr1 - xvfmul.d $xr5, $xr5, $xr1 - xvpickve.d $xr6, $xr4, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr4, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr4, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr7, $vr4, 48 - xvpickve.d $xr4, $xr5, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr6, $xr5, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr4, 16 - xvpickve.d $xr4, $xr5, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 32 - xvpickve.d $xr4, $xr5, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpermi.q $xr6, $xr7, 2 - xvst $xr6, $a3, 0 + xvpermi.q $xr5, $xr8, 2 + vreplvei.w $vr6, $vr7, 3 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr8, $vr7, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr6, 16 + vreplvei.w $vr6, $vr7, 1 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr7, $vr7, 0 + fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr6, 16 + xvpermi.q $xr7, $xr8, 2 + xvfsub.d $xr5, $xr3, $xr5 + xvfsub.d $xr6, $xr3, $xr7 + xvfmul.d $xr5, $xr5, $xr4 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr5, $xr5, $xr2 + xvfmul.d $xr6, $xr6, $xr2 + xvfcvt.s.d $xr5, $xr5, $xr6 + xvperm.w $xr5, $xr5, $xr1 + xvst $xr5, $a3, 0 addi.d $a4, $a4, -8 addi.d $a3, $a3, 32 addi.d $a2, $a2, 32 @@ -598,126 +569,86 @@ MSalignmm: # @MSalignmm beq $a0, $a1, .LBB0_18 b .LBB0_16 .LBB0_45: # %vector.ph293 - ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a1, $sp, 144 # 8-byte Folded Reload bstrpick.d $a1, $a1, 30, 3 slli.d $a1, $a1, 3 - xvreplve0.d $xr1, $xr0 - xvldi $xr2, -912 - xvldi $xr3, -928 - move $a2, $s0 + pcalau12i $a2, %pc_hi20(.LCPI0_0) + xvld $xr1, $a2, %pc_lo12(.LCPI0_0) + xvreplve0.d $xr2, $xr0 + xvldi $xr3, -912 + xvldi $xr4, -928 + move $a2, $fp move $a3, $a6 move $a4, $a1 .p2align 4, , 16 .LBB0_46: # %vector.body298 # =>This Inner Loop Header: Depth=1 - xvld $xr4, $a2, 0 - xvpermi.q $xr5, $xr4, 1 - vreplvei.w $vr6, $vr5, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr5, 2 + xvld $xr5, $a2, 0 + xvpermi.q $xr6, $xr5, 1 + vreplvei.w $vr7, $vr6, 3 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr5, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr6, 16 - xvpermi.q $xr5, $xr7, 2 - vreplvei.w $vr6, $vr4, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr4, 2 + vreplvei.w $vr8, $vr6, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr7, 16 + vreplvei.w $vr7, $vr6, 1 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr4, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr4, $vr4, 0 - fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr6, 16 - xvpermi.q $xr4, $xr7, 2 - xvfsub.d $xr5, $xr2, $xr5 - xvfsub.d $xr4, $xr2, $xr4 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr4, $xr4, $xr3 - xvfmul.d $xr5, $xr5, $xr1 - xvfmul.d $xr4, $xr4, $xr1 - xvpickve.d $xr6, $xr5, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr5, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr5, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 48 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - xvld $xr5, $a3, 0 - vextrins.w $vr6, $vr4, 48 - xvpermi.q $xr6, $xr7, 2 - xvst $xr6, $a2, 0 - xvpermi.q $xr4, $xr5, 1 - vreplvei.w $vr6, $vr4, 3 + vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr4, 2 + vextrins.d $vr6, $vr7, 16 + xvpermi.q $xr6, $xr8, 2 + vreplvei.w $vr7, $vr5, 3 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr4, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr4, $vr4, 0 - fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr6, 16 - xvpermi.q $xr4, $xr7, 2 + vreplvei.w $vr8, $vr5, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr7, 16 + vreplvei.w $vr7, $vr5, 1 + fcvt.d.s $fa7, $fa7 + vreplvei.w $vr5, $vr5, 0 + fcvt.d.s $fa5, $fa5 + vextrins.d $vr5, $vr7, 16 + xvpermi.q $xr5, $xr8, 2 + xvfsub.d $xr6, $xr3, $xr6 + xvfsub.d $xr5, $xr3, $xr5 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr5, $xr5, $xr4 + xvfmul.d $xr6, $xr6, $xr2 + xvfmul.d $xr5, $xr5, $xr2 + xvld $xr7, $a3, 0 + xvfcvt.s.d $xr5, $xr6, $xr5 + xvperm.w $xr5, $xr5, $xr1 + xvst $xr5, $a2, 0 + xvpermi.q $xr5, $xr7, 1 vreplvei.w $vr6, $vr5, 3 fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr5, 2 - fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 + vreplvei.w $vr8, $vr5, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr6, 16 vreplvei.w $vr6, $vr5, 1 fcvt.d.s $fa6, $fa6 vreplvei.w $vr5, $vr5, 0 fcvt.d.s $fa5, $fa5 vextrins.d $vr5, $vr6, 16 - xvpermi.q $xr5, $xr7, 2 - xvfsub.d $xr4, $xr2, $xr4 - xvfsub.d $xr5, $xr2, $xr5 - xvfmul.d $xr4, $xr4, $xr3 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr4, $xr4, $xr1 - xvfmul.d $xr5, $xr5, $xr1 - xvpickve.d $xr6, $xr4, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr4, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr4, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr7, $vr4, 48 - xvpickve.d $xr4, $xr5, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr6, $xr5, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr4, 16 - xvpickve.d $xr4, $xr5, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 32 - xvpickve.d $xr4, $xr5, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpermi.q $xr6, $xr7, 2 - xvst $xr6, $a3, 0 + xvpermi.q $xr5, $xr8, 2 + vreplvei.w $vr6, $vr7, 3 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr8, $vr7, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr6, 16 + vreplvei.w $vr6, $vr7, 1 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr7, $vr7, 0 + fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr6, 16 + xvpermi.q $xr7, $xr8, 2 + xvfsub.d $xr5, $xr3, $xr5 + xvfsub.d $xr6, $xr3, $xr7 + xvfmul.d $xr5, $xr5, $xr4 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr5, $xr5, $xr2 + xvfmul.d $xr6, $xr6, $xr2 + xvfcvt.s.d $xr5, $xr5, $xr6 + xvperm.w $xr5, $xr5, $xr1 + xvst $xr5, $a3, 0 addi.d $a4, $a4, -8 addi.d $a3, $a3, 32 addi.d $a2, $a2, 32 @@ -732,7 +663,7 @@ MSalignmm: # @MSalignmm pcalau12i $a1, %pc_hi20(.L.str) addi.d $a1, $a1, %pc_lo12(.L.str) move $a2, $s3 - ld.d $a3, $sp, 184 # 8-byte Folded Reload + move $a3, $s4 b .LBB0_50 .LBB0_49: pcalau12i $a0, %got_pc_hi20(stderr) @@ -741,7 +672,7 @@ MSalignmm: # @MSalignmm pcalau12i $a1, %pc_hi20(.L.str.2) addi.d $a1, $a1, %pc_lo12(.L.str.2) move $a2, $s7 - move $a3, $s4 + move $a3, $s1 .LBB0_50: pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 @@ -796,7 +727,7 @@ MSalignmm: # @MSalignmm pcalau12i $a0, %pc_hi20(.L.str.3) addi.d $a1, $a0, %pc_lo12(.L.str.3) move $a0, $fp - move $a3, $s5 + move $a3, $s3 pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ld.d $a0, $s0, 0 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Qalignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Qalignmm.s index bdbee63..96a6dca 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Qalignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Qalignmm.s @@ -458,6 +458,15 @@ imp_match_init_strictQ: # @imp_match_init_strictQ .section .rodata.cst32,"aM",@progbits,32 .p2align 5, 0x0 .LCPI3_1: + .word 0 # 0x0 + .word 1 # 0x1 + .word 4 # 0x4 + .word 5 # 0x5 + .word 2 # 0x2 + .word 3 # 0x3 + .word 6 # 0x6 + .word 7 # 0x7 +.LCPI3_2: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 @@ -466,17 +475,17 @@ imp_match_init_strictQ: # @imp_match_init_strictQ .word 6 # 0x6 .word 7 # 0x7 .word 8 # 0x8 -.LCPI3_2: +.LCPI3_3: .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 .dword 8 # 0x8 -.LCPI3_3: +.LCPI3_4: .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 .dword 4 # 0x4 -.LCPI3_6: +.LCPI3_7: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -487,15 +496,15 @@ imp_match_init_strictQ: # @imp_match_init_strictQ .word 7 # 0x7 .section .rodata.cst16,"aM",@progbits,16 .p2align 4, 0x0 -.LCPI3_4: +.LCPI3_5: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 .word 4 # 0x4 -.LCPI3_5: +.LCPI3_6: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI3_7: +.LCPI3_8: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -537,7 +546,7 @@ Q__align: # @Q__align pcalau12i $a0, %pc_hi20(Q__align.mseq1) st.d $a0, $sp, 184 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.mseq2) - st.d $a0, $sp, 160 # 8-byte Folded Spill + st.d $a0, $sp, 152 # 8-byte Folded Spill bnez $s6, .LBB3_2 # %bb.1: pcalau12i $a0, %got_pc_hi20(njob) @@ -554,7 +563,7 @@ Q__align: # @Q__align pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 ld.w $s6, $s4, %pc_lo12(Q__align.orlgth1) - ld.d $a1, $sp, 160 # 8-byte Folded Reload + ld.d $a1, $sp, 152 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.mseq2) .LBB3_2: ld.d $a0, $s1, 0 @@ -590,17 +599,17 @@ Q__align: # @Q__align pcalau12i $a0, %pc_hi20(Q__align.digf2) st.d $a0, $sp, 432 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.diaf1) - st.d $a0, $sp, 112 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(Q__align.diaf2) st.d $a0, $sp, 120 # 8-byte Folded Spill + pcalau12i $a0, %pc_hi20(Q__align.diaf2) + st.d $a0, $sp, 128 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.gapz1) st.d $a0, $sp, 456 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.gapz2) st.d $a0, $sp, 440 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.gapf1) - st.d $a0, $sp, 128 # 8-byte Folded Spill + st.d $a0, $sp, 136 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.gapf2) - st.d $a0, $sp, 152 # 8-byte Folded Spill + st.d $a0, $sp, 144 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.ogcp1g) st.d $a0, $sp, 464 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.ogcp2g) @@ -640,16 +649,16 @@ Q__align: # @Q__align st.d $s5, $sp, 208 # 8-byte Folded Spill st.d $s3, $sp, 216 # 8-byte Folded Spill st.d $s1, $sp, 200 # 8-byte Folded Spill - st.d $s2, $sp, 136 # 8-byte Folded Spill - st.d $fp, $sp, 144 # 8-byte Folded Spill + st.d $fp, $sp, 160 # 8-byte Folded Spill st.d $a5, $sp, 416 # 8-byte Folded Spill st.d $s4, $sp, 56 # 8-byte Folded Spill + st.d $s2, $sp, 112 # 8-byte Folded Spill blt $s6, $fp, .LBB3_5 # %bb.3: blt $s0, $a5, .LBB3_5 # %bb.4: ld.d $fp, $sp, 368 # 8-byte Folded Reload - ld.d $s2, $sp, 144 # 8-byte Folded Reload + ld.d $s2, $sp, 160 # 8-byte Folded Reload bgtz $s3, .LBB3_9 b .LBB3_11 .LBB3_5: @@ -697,11 +706,11 @@ Q__align: # @Q__align ld.d $a0, $a0, %pc_lo12(Q__align.digf2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 120 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.diaf1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.diaf2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 @@ -713,11 +722,11 @@ Q__align: # @Q__align ld.d $a0, $a0, %pc_lo12(Q__align.gapz2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $sp, 136 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapf1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapf2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 @@ -877,12 +886,12 @@ Q__align: # @Q__align move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 112 # 8-byte Folded Reload + ld.d $a1, $sp, 120 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.diaf1) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 120 # 8-byte Folded Reload + ld.d $a1, $sp, 128 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.diaf2) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -897,12 +906,12 @@ Q__align: # @Q__align move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 128 # 8-byte Folded Reload + ld.d $a1, $sp, 136 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.gapf1) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 152 # 8-byte Folded Reload + ld.d $a1, $sp, 144 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.gapf2) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -1011,7 +1020,7 @@ Q__align: # @Q__align ld.d $s3, $sp, 216 # 8-byte Folded Reload ld.d $s1, $sp, 200 # 8-byte Folded Reload ld.d $a5, $sp, 416 # 8-byte Folded Reload - ld.d $s2, $sp, 144 # 8-byte Folded Reload + ld.d $s2, $sp, 160 # 8-byte Folded Reload blez $s3, .LBB3_11 .LBB3_9: # %.lr.ph ld.d $a0, $fp, %pc_lo12(Q__align.mseq) @@ -1033,7 +1042,7 @@ Q__align: # @Q__align blez $s7, .LBB3_14 # %bb.12: # %.lr.ph568 ld.d $a1, $fp, %pc_lo12(Q__align.mseq) - ld.d $a0, $sp, 160 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.mseq2) alsl.d $a1, $s3, $a1, 3 move $a2, $s7 @@ -1101,7 +1110,7 @@ Q__align: # @Q__align st.d $a0, $a1, 0 st.w $s2, $fp, 0 st.w $s0, $s1, 0 - ld.d $s2, $sp, 144 # 8-byte Folded Reload + ld.d $s2, $sp, 160 # 8-byte Folded Reload .LBB3_21: ld.d $a1, $sp, 360 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(Q__align.cpmx1) @@ -1192,7 +1201,7 @@ Q__align: # @Q__align move $a6, $s5 pcaddu18i $ra, %call36(getdigapfreq_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 120 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.diaf1) ld.d $a1, $sp, 216 # 8-byte Folded Reload move $a2, $s0 @@ -1204,7 +1213,7 @@ Q__align: # @Q__align ld.d $fp, $sp, 216 # 8-byte Folded Reload pcaddu18i $ra, %call36(getdiaminofreq_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.diaf2) move $a1, $s7 ld.d $a2, $sp, 208 # 8-byte Folded Reload @@ -1215,7 +1224,7 @@ Q__align: # @Q__align ld.d $s5, $sp, 208 # 8-byte Folded Reload pcaddu18i $ra, %call36(getdiaminofreq_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $sp, 136 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapf1) move $a1, $fp move $a2, $s0 @@ -1223,7 +1232,7 @@ Q__align: # @Q__align move $a4, $s2 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapf2) move $a1, $s7 move $a2, $s5 @@ -1297,7 +1306,7 @@ Q__align: # @Q__align move $a4, $s1 pcaddu18i $ra, %call36(getdigapfreq_st) jirl $ra, $ra, 0 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 120 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.diaf1) move $a1, $s3 move $a2, $s0 @@ -1305,7 +1314,7 @@ Q__align: # @Q__align move $a4, $s2 pcaddu18i $ra, %call36(getdiaminofreq_x) jirl $ra, $ra, 0 - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.diaf2) move $a1, $s7 move $a2, $s5 @@ -1313,7 +1322,7 @@ Q__align: # @Q__align move $a4, $s1 pcaddu18i $ra, %call36(getdiaminofreq_x) jirl $ra, $ra, 0 - ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $sp, 136 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapf1) move $a1, $s3 move $a2, $s0 @@ -1321,7 +1330,7 @@ Q__align: # @Q__align move $a4, $s2 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapf2) move $a1, $s7 move $a2, $s5 @@ -1346,11 +1355,11 @@ Q__align: # @Q__align pcaddu18i $ra, %call36(getgapfreq_zure) jirl $ra, $ra, 0 .LBB3_24: - ld.d $s4, $sp, 136 # 8-byte Folded Reload + ld.d $s4, $sp, 112 # 8-byte Folded Reload ld.d $s6, $sp, 240 # 8-byte Folded Reload addi.w $s8, $zero, -1 - xvld $xr19, $sp, 368 # 32-byte Folded Reload - fcvt.d.s $fa0, $ft11 + xvld $xr18, $sp, 368 # 32-byte Folded Reload + fcvt.d.s $fa0, $ft10 move $ra, $s1 blt $s1, $s8, .LBB3_29 # %bb.25: # %.lr.ph571 @@ -1591,7 +1600,7 @@ Q__align: # @Q__align jirl $ra, $ra, 0 .LBB3_38: # %clearvec.exit ld.d $ra, $sp, 416 # 8-byte Folded Reload - xvld $xr19, $sp, 368 # 32-byte Folded Reload + xvld $xr18, $sp, 368 # 32-byte Folded Reload ld.d $a0, $sp, 80 # 8-byte Folded Reload move $t3, $s0 move $t4, $s1 @@ -1733,7 +1742,7 @@ Q__align: # @Q__align sltu $s1, $a5, $fp sltu $s0, $a7, $s0 and $s0, $s1, $s0 - ld.d $s2, $sp, 144 # 8-byte Folded Reload + ld.d $s2, $sp, 160 # 8-byte Folded Reload bnez $s0, .LBB3_27 # %bb.65: # %vector.memcheck sltu $s0, $a6, $t4 @@ -1784,239 +1793,156 @@ Q__align: # @Q__align bstrpick.d $t2, $t3, 31, 3 slli.d $t2, $t2, 3 xvreplve0.d $xr1, $xr0 - xvldi $xr2, -912 - xvldi $xr3, -928 - xvldi $xr4, -1424 + pcalau12i $t3, %pc_hi20(.LCPI3_1) + xvld $xr2, $t3, %pc_lo12(.LCPI3_1) + xvldi $xr3, -912 + xvldi $xr4, -928 + xvldi $xr5, -1424 move $t3, $a0 move $t4, $a1 move $t5, $a2 move $t6, $a3 move $t7, $a4 move $t8, $a5 - move $s0, $a6 - move $s3, $a7 - move $s4, $t0 - move $s5, $t2 + move $fp, $a6 + move $s0, $a7 + move $s1, $t0 + move $s2, $t2 .p2align 4, , 16 .LBB3_75: # %vector.body # =>This Inner Loop Header: Depth=1 - xvld $xr6, $t3, 0 - xvpermi.q $xr5, $xr6, 1 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 + xvld $xr7, $t3, 0 + xvpermi.q $xr6, $xr7, 1 + vreplvei.w $vr8, $vr6, 3 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 - fcvt.d.s $fa7, $fa7 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvld $xr11, $t4, 0 - xvpermi.q $xr6, $xr8, 2 - xvfsub.d $xr9, $xr2, $xr5 - xvfsub.d $xr10, $xr2, $xr6 - xvpermi.q $xr7, $xr11, 1 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 vreplvei.w $vr8, $vr7, 3 fcvt.d.s $ft0, $ft0 - vreplvei.w $vr12, $vr7, 2 - fcvt.d.s $ft4, $ft4 - vextrins.d $vr12, $vr8, 16 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 vreplvei.w $vr8, $vr7, 1 fcvt.d.s $ft0, $ft0 vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 vextrins.d $vr7, $vr8, 16 - xvpermi.q $xr7, $xr12, 2 - vreplvei.w $vr8, $vr11, 3 - fcvt.d.s $ft0, $ft0 - vreplvei.w $vr12, $vr11, 2 + xvld $xr11, $t4, 0 + xvpermi.q $xr7, $xr9, 2 + xvfsub.d $xr8, $xr3, $xr6 + xvfsub.d $xr9, $xr3, $xr7 + xvpermi.q $xr10, $xr11, 1 + vreplvei.w $vr12, $vr10, 3 fcvt.d.s $ft4, $ft4 - vextrins.d $vr12, $vr8, 16 - vreplvei.w $vr8, $vr11, 1 - fcvt.d.s $ft5, $ft0 - vreplvei.w $vr8, $vr11, 0 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr13, 16 - xvpermi.q $xr8, $xr12, 2 - xvfsub.d $xr11, $xr9, $xr7 - xvfsub.d $xr12, $xr10, $xr8 - xvfmul.d $xr11, $xr11, $xr1 - xvfmul.d $xr12, $xr12, $xr1 - xvfmul.d $xr11, $xr11, $xr3 - xvfmul.d $xr12, $xr12, $xr3 - xvpickve.d $xr13, $xr11, 1 - fcvt.s.d $ft5, $ft5 - xvpickve.d $xr14, $xr11, 0 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr14, $vr13, 16 - xvpickve.d $xr13, $xr11, 2 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr14, $vr13, 32 - xvpickve.d $xr11, $xr11, 3 - fcvt.s.d $ft3, $ft3 - vextrins.w $vr14, $vr11, 48 - xvpickve.d $xr11, $xr12, 1 - fcvt.s.d $ft3, $ft3 - xvpickve.d $xr13, $xr12, 0 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr13, $vr11, 16 - xvpickve.d $xr11, $xr12, 2 - fcvt.s.d $ft3, $ft3 - vextrins.w $vr13, $vr11, 32 - xvpickve.d $xr11, $xr12, 3 - fcvt.s.d $ft3, $ft3 - xvld $xr12, $t6, 0 - vextrins.w $vr13, $vr11, 48 - xvpermi.q $xr13, $xr14, 2 - xvst $xr13, $t5, 0 - xvpermi.q $xr11, $xr12, 1 - vreplvei.w $vr13, $vr11, 3 + vreplvei.w $vr13, $vr10, 2 fcvt.d.s $ft5, $ft5 - vreplvei.w $vr14, $vr11, 2 - fcvt.d.s $ft6, $ft6 - vextrins.d $vr14, $vr13, 16 - vreplvei.w $vr13, $vr11, 1 - fcvt.d.s $ft7, $ft5 + vextrins.d $vr13, $vr12, 16 + vreplvei.w $vr12, $vr10, 1 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr10, $vr10, 0 + fcvt.d.s $ft2, $ft2 + vextrins.d $vr10, $vr12, 16 + xvpermi.q $xr10, $xr13, 2 + vreplvei.w $vr12, $vr11, 3 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr13, $vr11, 2 + fcvt.d.s $ft5, $ft5 + vextrins.d $vr13, $vr12, 16 + vreplvei.w $vr12, $vr11, 1 + fcvt.d.s $ft4, $ft4 vreplvei.w $vr11, $vr11, 0 - fcvt.d.s $ft5, $ft3 - vextrins.d $vr13, $vr15, 16 - xvpermi.q $xr13, $xr14, 2 - xvfsub.d $xr11, $xr2, $xr13 - xvfsub.d $xr14, $xr11, $xr7 - vreplvei.w $vr15, $vr12, 3 - fcvt.d.s $ft7, $ft7 - vreplvei.w $vr16, $vr12, 2 - fcvt.d.s $ft8, $ft8 - vextrins.d $vr16, $vr15, 16 - vreplvei.w $vr15, $vr12, 1 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr12, 16 + xvpermi.q $xr11, $xr13, 2 + xvfsub.d $xr12, $xr8, $xr10 + xvfsub.d $xr13, $xr9, $xr11 + xvfmul.d $xr12, $xr12, $xr1 + xvfmul.d $xr13, $xr13, $xr1 + xvfmul.d $xr12, $xr12, $xr4 + xvfmul.d $xr13, $xr13, $xr4 + xvld $xr14, $t6, 0 + xvfcvt.s.d $xr12, $xr12, $xr13 + xvperm.w $xr12, $xr12, $xr2 + xvst $xr12, $t5, 0 + xvpermi.q $xr12, $xr14, 1 + vreplvei.w $vr13, $vr12, 3 + fcvt.d.s $ft5, $ft5 + vreplvei.w $vr15, $vr12, 2 fcvt.d.s $ft7, $ft7 + vextrins.d $vr15, $vr13, 16 + vreplvei.w $vr13, $vr12, 1 + fcvt.d.s $ft5, $ft5 vreplvei.w $vr12, $vr12, 0 - fcvt.d.s $ft9, $ft4 - vextrins.d $vr17, $vr15, 16 - xvpermi.q $xr17, $xr16, 2 - xvfsub.d $xr12, $xr2, $xr17 - xvfsub.d $xr15, $xr12, $xr8 - xvfmul.d $xr14, $xr14, $xr1 + fcvt.d.s $ft4, $ft4 + vextrins.d $vr12, $vr13, 16 + xvpermi.q $xr12, $xr15, 2 + xvfsub.d $xr13, $xr3, $xr12 + xvfsub.d $xr15, $xr13, $xr10 + vreplvei.w $vr16, $vr14, 3 + fcvt.d.s $ft8, $ft8 + vreplvei.w $vr17, $vr14, 2 + fcvt.d.s $ft9, $ft9 + vextrins.d $vr17, $vr16, 16 + vreplvei.w $vr16, $vr14, 1 + fcvt.d.s $ft8, $ft8 + vreplvei.w $vr14, $vr14, 0 + fcvt.d.s $ft6, $ft6 + vextrins.d $vr14, $vr16, 16 + xvpermi.q $xr14, $xr17, 2 + xvfsub.d $xr16, $xr3, $xr14 + xvfsub.d $xr17, $xr16, $xr11 xvfmul.d $xr15, $xr15, $xr1 - xvfmul.d $xr14, $xr14, $xr3 - xvfmul.d $xr15, $xr15, $xr3 - xvpickve.d $xr16, $xr14, 1 - fcvt.s.d $ft8, $ft8 - xvpickve.d $xr18, $xr14, 0 - fcvt.s.d $ft10, $ft10 - vextrins.w $vr18, $vr16, 16 - xvpickve.d $xr16, $xr14, 2 - fcvt.s.d $ft8, $ft8 - vextrins.w $vr18, $vr16, 32 - xvpickve.d $xr14, $xr14, 3 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr18, $vr14, 48 - xvpickve.d $xr14, $xr15, 1 - fcvt.s.d $ft6, $ft6 - xvpickve.d $xr16, $xr15, 0 - fcvt.s.d $ft8, $ft8 - vextrins.w $vr16, $vr14, 16 - xvpickve.d $xr14, $xr15, 2 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr16, $vr14, 32 - xvpickve.d $xr14, $xr15, 3 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr16, $vr14, 48 - xvpermi.q $xr16, $xr18, 2 - xvst $xr16, $t7, 0 - xvfadd.d $xr9, $xr9, $xr13 - xvfadd.d $xr10, $xr10, $xr17 - xvfsub.d $xr9, $xr9, $xr7 - xvfsub.d $xr10, $xr10, $xr8 - xvfmul.d $xr9, $xr9, $xr3 - xvfmul.d $xr10, $xr10, $xr3 + xvfmul.d $xr17, $xr17, $xr1 + xvfmul.d $xr15, $xr15, $xr4 + xvfmul.d $xr17, $xr17, $xr4 + xvfcvt.s.d $xr15, $xr15, $xr17 + xvperm.w $xr15, $xr15, $xr2 + xvst $xr15, $t7, 0 + xvfadd.d $xr8, $xr8, $xr12 + xvfadd.d $xr9, $xr9, $xr14 + xvfsub.d $xr8, $xr8, $xr10 + xvfsub.d $xr9, $xr9, $xr11 + xvfmul.d $xr8, $xr8, $xr4 + xvfmul.d $xr9, $xr9, $xr4 + xvfmul.d $xr8, $xr8, $xr1 xvfmul.d $xr9, $xr9, $xr1 - xvfmul.d $xr10, $xr10, $xr1 - xvpickve.d $xr13, $xr9, 1 - fcvt.s.d $ft5, $ft5 - xvpickve.d $xr14, $xr9, 0 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr14, $vr13, 16 - xvpickve.d $xr13, $xr9, 2 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr14, $vr13, 32 - xvpickve.d $xr9, $xr9, 3 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr14, $vr9, 48 - xvpickve.d $xr9, $xr10, 1 - fcvt.s.d $ft1, $ft1 - xvpickve.d $xr13, $xr10, 0 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr13, $vr9, 16 - xvpickve.d $xr9, $xr10, 2 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr13, $vr9, 32 - xvpickve.d $xr9, $xr10, 3 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr13, $vr9, 48 - xvpermi.q $xr13, $xr14, 2 - xvst $xr13, $t8, 0 - xvfadd.d $xr5, $xr11, $xr5 - xvfadd.d $xr6, $xr12, $xr6 - xvfsub.d $xr5, $xr5, $xr7 - xvfsub.d $xr6, $xr6, $xr8 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr6, $xr6, $xr3 - xvfmul.d $xr5, $xr5, $xr1 + xvfcvt.s.d $xr8, $xr8, $xr9 + xvperm.w $xr8, $xr8, $xr2 + xvst $xr8, $t8, 0 + xvfadd.d $xr6, $xr13, $xr6 + xvfadd.d $xr7, $xr16, $xr7 + xvfsub.d $xr6, $xr6, $xr10 + xvfsub.d $xr7, $xr7, $xr11 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr7, $xr7, $xr4 xvfmul.d $xr6, $xr6, $xr1 - xvpickve.d $xr7, $xr5, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr5, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr5, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr8, $vr5, 48 - xvpickve.d $xr5, $xr6, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr7, $xr6, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr5, 16 - xvpickve.d $xr5, $xr6, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 32 - xvpickve.d $xr5, $xr6, 3 - fcvt.s.d $fa5, $fa5 - xvld $xr6, $s3, 0 - vextrins.w $vr7, $vr5, 48 - xvpermi.q $xr7, $xr8, 2 - xvst $xr7, $s0, 0 - xvfsub.s $xr5, $xr4, $xr6 - xvst $xr5, $s4, 0 - addi.d $s5, $s5, -8 - addi.d $s4, $s4, 32 - addi.d $s3, $s3, 32 + xvfmul.d $xr7, $xr7, $xr1 + xvld $xr8, $s0, 0 + xvfcvt.s.d $xr6, $xr6, $xr7 + xvperm.w $xr6, $xr6, $xr2 + xvst $xr6, $fp, 0 + xvfsub.s $xr6, $xr5, $xr8 + xvst $xr6, $s1, 0 + addi.d $s2, $s2, -8 + addi.d $s1, $s1, 32 addi.d $s0, $s0, 32 + addi.d $fp, $fp, 32 addi.d $t8, $t8, 32 addi.d $t7, $t7, 32 addi.d $t6, $t6, 32 addi.d $t5, $t5, 32 addi.d $t4, $t4, 32 addi.d $t3, $t3, 32 - bnez $s5, .LBB3_75 + bnez $s2, .LBB3_75 # %bb.76: # %middle.block - ld.d $s5, $sp, 208 # 8-byte Folded Reload - ld.d $s4, $sp, 136 # 8-byte Folded Reload + ld.d $s2, $sp, 160 # 8-byte Folded Reload bne $t1, $t2, .LBB3_27 b .LBB3_29 .LBB3_77: # %vector.memcheck900 @@ -2133,7 +2059,7 @@ Q__align: # @Q__align sltu $s1, $a5, $fp sltu $s0, $a7, $s0 and $s0, $s1, $s0 - ld.d $s2, $sp, 144 # 8-byte Folded Reload + ld.d $s2, $sp, 160 # 8-byte Folded Reload bnez $s0, .LBB3_32 # %bb.98: # %vector.memcheck900 sltu $s0, $a6, $t4 @@ -2184,239 +2110,156 @@ Q__align: # @Q__align bstrpick.d $t2, $t3, 31, 3 slli.d $t2, $t2, 3 xvreplve0.d $xr1, $xr0 - xvldi $xr2, -912 - xvldi $xr3, -928 - xvldi $xr4, -1424 + pcalau12i $t3, %pc_hi20(.LCPI3_1) + xvld $xr2, $t3, %pc_lo12(.LCPI3_1) + xvldi $xr3, -912 + xvldi $xr4, -928 + xvldi $xr5, -1424 move $t3, $a0 move $t4, $a1 move $t5, $a2 move $t6, $a3 move $t7, $a4 move $t8, $a5 - move $s0, $a6 - move $s3, $a7 - move $s4, $t0 - move $s5, $t2 + move $fp, $a6 + move $s0, $a7 + move $s1, $t0 + move $s2, $t2 .p2align 4, , 16 .LBB3_108: # %vector.body1036 # =>This Inner Loop Header: Depth=1 - xvld $xr6, $t3, 0 - xvpermi.q $xr5, $xr6, 1 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 + xvld $xr7, $t3, 0 + xvpermi.q $xr6, $xr7, 1 + vreplvei.w $vr8, $vr6, 3 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 - fcvt.d.s $fa7, $fa7 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvld $xr11, $t4, 0 - xvpermi.q $xr6, $xr8, 2 - xvfsub.d $xr9, $xr2, $xr5 - xvfsub.d $xr10, $xr2, $xr6 - xvpermi.q $xr7, $xr11, 1 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 vreplvei.w $vr8, $vr7, 3 fcvt.d.s $ft0, $ft0 - vreplvei.w $vr12, $vr7, 2 - fcvt.d.s $ft4, $ft4 - vextrins.d $vr12, $vr8, 16 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 vreplvei.w $vr8, $vr7, 1 fcvt.d.s $ft0, $ft0 vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 vextrins.d $vr7, $vr8, 16 - xvpermi.q $xr7, $xr12, 2 - vreplvei.w $vr8, $vr11, 3 - fcvt.d.s $ft0, $ft0 - vreplvei.w $vr12, $vr11, 2 + xvld $xr11, $t4, 0 + xvpermi.q $xr7, $xr9, 2 + xvfsub.d $xr8, $xr3, $xr6 + xvfsub.d $xr9, $xr3, $xr7 + xvpermi.q $xr10, $xr11, 1 + vreplvei.w $vr12, $vr10, 3 fcvt.d.s $ft4, $ft4 - vextrins.d $vr12, $vr8, 16 - vreplvei.w $vr8, $vr11, 1 - fcvt.d.s $ft5, $ft0 - vreplvei.w $vr8, $vr11, 0 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr13, 16 - xvpermi.q $xr8, $xr12, 2 - xvfsub.d $xr11, $xr9, $xr7 - xvfsub.d $xr12, $xr10, $xr8 - xvfmul.d $xr11, $xr11, $xr1 - xvfmul.d $xr12, $xr12, $xr1 - xvfmul.d $xr11, $xr11, $xr3 - xvfmul.d $xr12, $xr12, $xr3 - xvpickve.d $xr13, $xr11, 1 - fcvt.s.d $ft5, $ft5 - xvpickve.d $xr14, $xr11, 0 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr14, $vr13, 16 - xvpickve.d $xr13, $xr11, 2 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr14, $vr13, 32 - xvpickve.d $xr11, $xr11, 3 - fcvt.s.d $ft3, $ft3 - vextrins.w $vr14, $vr11, 48 - xvpickve.d $xr11, $xr12, 1 - fcvt.s.d $ft3, $ft3 - xvpickve.d $xr13, $xr12, 0 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr13, $vr11, 16 - xvpickve.d $xr11, $xr12, 2 - fcvt.s.d $ft3, $ft3 - vextrins.w $vr13, $vr11, 32 - xvpickve.d $xr11, $xr12, 3 - fcvt.s.d $ft3, $ft3 - xvld $xr12, $t6, 0 - vextrins.w $vr13, $vr11, 48 - xvpermi.q $xr13, $xr14, 2 - xvst $xr13, $t5, 0 - xvpermi.q $xr11, $xr12, 1 - vreplvei.w $vr13, $vr11, 3 + vreplvei.w $vr13, $vr10, 2 fcvt.d.s $ft5, $ft5 - vreplvei.w $vr14, $vr11, 2 - fcvt.d.s $ft6, $ft6 - vextrins.d $vr14, $vr13, 16 - vreplvei.w $vr13, $vr11, 1 - fcvt.d.s $ft7, $ft5 + vextrins.d $vr13, $vr12, 16 + vreplvei.w $vr12, $vr10, 1 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr10, $vr10, 0 + fcvt.d.s $ft2, $ft2 + vextrins.d $vr10, $vr12, 16 + xvpermi.q $xr10, $xr13, 2 + vreplvei.w $vr12, $vr11, 3 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr13, $vr11, 2 + fcvt.d.s $ft5, $ft5 + vextrins.d $vr13, $vr12, 16 + vreplvei.w $vr12, $vr11, 1 + fcvt.d.s $ft4, $ft4 vreplvei.w $vr11, $vr11, 0 - fcvt.d.s $ft5, $ft3 - vextrins.d $vr13, $vr15, 16 - xvpermi.q $xr13, $xr14, 2 - xvfsub.d $xr11, $xr2, $xr13 - xvfsub.d $xr14, $xr11, $xr7 - vreplvei.w $vr15, $vr12, 3 - fcvt.d.s $ft7, $ft7 - vreplvei.w $vr16, $vr12, 2 - fcvt.d.s $ft8, $ft8 - vextrins.d $vr16, $vr15, 16 - vreplvei.w $vr15, $vr12, 1 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr12, 16 + xvpermi.q $xr11, $xr13, 2 + xvfsub.d $xr12, $xr8, $xr10 + xvfsub.d $xr13, $xr9, $xr11 + xvfmul.d $xr12, $xr12, $xr1 + xvfmul.d $xr13, $xr13, $xr1 + xvfmul.d $xr12, $xr12, $xr4 + xvfmul.d $xr13, $xr13, $xr4 + xvld $xr14, $t6, 0 + xvfcvt.s.d $xr12, $xr12, $xr13 + xvperm.w $xr12, $xr12, $xr2 + xvst $xr12, $t5, 0 + xvpermi.q $xr12, $xr14, 1 + vreplvei.w $vr13, $vr12, 3 + fcvt.d.s $ft5, $ft5 + vreplvei.w $vr15, $vr12, 2 fcvt.d.s $ft7, $ft7 + vextrins.d $vr15, $vr13, 16 + vreplvei.w $vr13, $vr12, 1 + fcvt.d.s $ft5, $ft5 vreplvei.w $vr12, $vr12, 0 - fcvt.d.s $ft9, $ft4 - vextrins.d $vr17, $vr15, 16 - xvpermi.q $xr17, $xr16, 2 - xvfsub.d $xr12, $xr2, $xr17 - xvfsub.d $xr15, $xr12, $xr8 - xvfmul.d $xr14, $xr14, $xr1 + fcvt.d.s $ft4, $ft4 + vextrins.d $vr12, $vr13, 16 + xvpermi.q $xr12, $xr15, 2 + xvfsub.d $xr13, $xr3, $xr12 + xvfsub.d $xr15, $xr13, $xr10 + vreplvei.w $vr16, $vr14, 3 + fcvt.d.s $ft8, $ft8 + vreplvei.w $vr17, $vr14, 2 + fcvt.d.s $ft9, $ft9 + vextrins.d $vr17, $vr16, 16 + vreplvei.w $vr16, $vr14, 1 + fcvt.d.s $ft8, $ft8 + vreplvei.w $vr14, $vr14, 0 + fcvt.d.s $ft6, $ft6 + vextrins.d $vr14, $vr16, 16 + xvpermi.q $xr14, $xr17, 2 + xvfsub.d $xr16, $xr3, $xr14 + xvfsub.d $xr17, $xr16, $xr11 xvfmul.d $xr15, $xr15, $xr1 - xvfmul.d $xr14, $xr14, $xr3 - xvfmul.d $xr15, $xr15, $xr3 - xvpickve.d $xr16, $xr14, 1 - fcvt.s.d $ft8, $ft8 - xvpickve.d $xr18, $xr14, 0 - fcvt.s.d $ft10, $ft10 - vextrins.w $vr18, $vr16, 16 - xvpickve.d $xr16, $xr14, 2 - fcvt.s.d $ft8, $ft8 - vextrins.w $vr18, $vr16, 32 - xvpickve.d $xr14, $xr14, 3 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr18, $vr14, 48 - xvpickve.d $xr14, $xr15, 1 - fcvt.s.d $ft6, $ft6 - xvpickve.d $xr16, $xr15, 0 - fcvt.s.d $ft8, $ft8 - vextrins.w $vr16, $vr14, 16 - xvpickve.d $xr14, $xr15, 2 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr16, $vr14, 32 - xvpickve.d $xr14, $xr15, 3 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr16, $vr14, 48 - xvpermi.q $xr16, $xr18, 2 - xvst $xr16, $t7, 0 - xvfadd.d $xr9, $xr9, $xr13 - xvfadd.d $xr10, $xr10, $xr17 - xvfsub.d $xr9, $xr9, $xr7 - xvfsub.d $xr10, $xr10, $xr8 - xvfmul.d $xr9, $xr9, $xr3 - xvfmul.d $xr10, $xr10, $xr3 + xvfmul.d $xr17, $xr17, $xr1 + xvfmul.d $xr15, $xr15, $xr4 + xvfmul.d $xr17, $xr17, $xr4 + xvfcvt.s.d $xr15, $xr15, $xr17 + xvperm.w $xr15, $xr15, $xr2 + xvst $xr15, $t7, 0 + xvfadd.d $xr8, $xr8, $xr12 + xvfadd.d $xr9, $xr9, $xr14 + xvfsub.d $xr8, $xr8, $xr10 + xvfsub.d $xr9, $xr9, $xr11 + xvfmul.d $xr8, $xr8, $xr4 + xvfmul.d $xr9, $xr9, $xr4 + xvfmul.d $xr8, $xr8, $xr1 xvfmul.d $xr9, $xr9, $xr1 - xvfmul.d $xr10, $xr10, $xr1 - xvpickve.d $xr13, $xr9, 1 - fcvt.s.d $ft5, $ft5 - xvpickve.d $xr14, $xr9, 0 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr14, $vr13, 16 - xvpickve.d $xr13, $xr9, 2 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr14, $vr13, 32 - xvpickve.d $xr9, $xr9, 3 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr14, $vr9, 48 - xvpickve.d $xr9, $xr10, 1 - fcvt.s.d $ft1, $ft1 - xvpickve.d $xr13, $xr10, 0 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr13, $vr9, 16 - xvpickve.d $xr9, $xr10, 2 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr13, $vr9, 32 - xvpickve.d $xr9, $xr10, 3 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr13, $vr9, 48 - xvpermi.q $xr13, $xr14, 2 - xvst $xr13, $t8, 0 - xvfadd.d $xr5, $xr11, $xr5 - xvfadd.d $xr6, $xr12, $xr6 - xvfsub.d $xr5, $xr5, $xr7 - xvfsub.d $xr6, $xr6, $xr8 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr6, $xr6, $xr3 - xvfmul.d $xr5, $xr5, $xr1 + xvfcvt.s.d $xr8, $xr8, $xr9 + xvperm.w $xr8, $xr8, $xr2 + xvst $xr8, $t8, 0 + xvfadd.d $xr6, $xr13, $xr6 + xvfadd.d $xr7, $xr16, $xr7 + xvfsub.d $xr6, $xr6, $xr10 + xvfsub.d $xr7, $xr7, $xr11 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr7, $xr7, $xr4 xvfmul.d $xr6, $xr6, $xr1 - xvpickve.d $xr7, $xr5, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr5, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr5, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr8, $vr5, 48 - xvpickve.d $xr5, $xr6, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr7, $xr6, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr5, 16 - xvpickve.d $xr5, $xr6, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 32 - xvpickve.d $xr5, $xr6, 3 - fcvt.s.d $fa5, $fa5 - xvld $xr6, $s3, 0 - vextrins.w $vr7, $vr5, 48 - xvpermi.q $xr7, $xr8, 2 - xvst $xr7, $s0, 0 - xvfsub.s $xr5, $xr4, $xr6 - xvst $xr5, $s4, 0 - addi.d $s5, $s5, -8 - addi.d $s4, $s4, 32 - addi.d $s3, $s3, 32 + xvfmul.d $xr7, $xr7, $xr1 + xvld $xr8, $s0, 0 + xvfcvt.s.d $xr6, $xr6, $xr7 + xvperm.w $xr6, $xr6, $xr2 + xvst $xr6, $fp, 0 + xvfsub.s $xr6, $xr5, $xr8 + xvst $xr6, $s1, 0 + addi.d $s2, $s2, -8 + addi.d $s1, $s1, 32 addi.d $s0, $s0, 32 + addi.d $fp, $fp, 32 addi.d $t8, $t8, 32 addi.d $t7, $t7, 32 addi.d $t6, $t6, 32 addi.d $t5, $t5, 32 addi.d $t4, $t4, 32 addi.d $t3, $t3, 32 - bnez $s5, .LBB3_108 + bnez $s2, .LBB3_108 # %bb.109: # %middle.block1050 - ld.d $s5, $sp, 208 # 8-byte Folded Reload - ld.d $s4, $sp, 136 # 8-byte Folded Reload + ld.d $s2, $sp, 160 # 8-byte Folded Reload bne $t1, $t2, .LBB3_32 b .LBB3_34 .LBB3_110: # %clearvec.exit.thread @@ -2464,7 +2307,7 @@ Q__align: # @Q__align ld.d $a1, $sp, 232 # 8-byte Folded Reload or $a0, $a1, $a0 move $ra, $fp - xvld $xr19, $sp, 368 # 32-byte Folded Reload + xvld $xr18, $sp, 368 # 32-byte Folded Reload move $t3, $s0 move $t4, $s1 bnez $a0, .LBB3_122 @@ -2643,73 +2486,54 @@ Q__align: # @Q__align ori $a5, $zero, 1 move $a3, $a2 bstrins.d $a3, $a5, 2, 0 + pcalau12i $a5, %pc_hi20(.LCPI3_2) + xvld $xr0, $a5, %pc_lo12(.LCPI3_2) pcalau12i $a5, %pc_hi20(.LCPI3_1) - xvld $xr0, $a5, %pc_lo12(.LCPI3_1) - xvreplgr2vr.w $xr1, $a0 + xvld $xr1, $a5, %pc_lo12(.LCPI3_1) + xvreplgr2vr.w $xr2, $a0 addi.d $a5, $t3, 4 - xvldi $xr2, -800 + xvldi $xr3, -800 move $a6, $a4 .p2align 4, , 16 .LBB3_135: # %vector.body1099 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr1, $xr0 - xvpermi.q $xr4, $xr3, 1 + xvmul.w $xr4, $xr2, $xr0 + xvpermi.q $xr5, $xr4, 1 + vext2xv.d.w $xr5, $xr5 + xvffint.d.l $xr5, $xr5 vext2xv.d.w $xr4, $xr4 + xvld $xr6, $a5, 0 xvffint.d.l $xr4, $xr4 - vext2xv.d.w $xr3, $xr3 - xvld $xr5, $a5, 0 - xvffint.d.l $xr3, $xr3 - xvfmul.d $xr3, $xr3, $xr2 - xvfmul.d $xr4, $xr4, $xr2 - xvpermi.q $xr6, $xr5, 1 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + xvfmul.d $xr4, $xr4, $xr3 + xvfmul.d $xr5, $xr5, $xr3 + xvpermi.q $xr7, $xr6, 1 + vreplvei.w $vr8, $vr7, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr7, 1 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 + vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr8, 16 + xvpermi.q $xr7, $xr9, 2 + vreplvei.w $vr8, $vr6, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 + fcvt.d.s $ft0, $ft0 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvpermi.q $xr6, $xr8, 2 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 + xvfadd.d $xr5, $xr7, $xr5 xvfadd.d $xr4, $xr6, $xr4 - xvfadd.d $xr3, $xr5, $xr3 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpermi.q $xr5, $xr6, 2 - xvst $xr5, $a5, 0 + xvfcvt.s.d $xr4, $xr5, $xr4 + xvperm.w $xr4, $xr4, $xr1 + xvst $xr4, $a5, 0 xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 @@ -2763,73 +2587,54 @@ Q__align: # @Q__align ori $a6, $zero, 1 move $a4, $a3 bstrins.d $a4, $a6, 2, 0 + pcalau12i $a6, %pc_hi20(.LCPI3_2) + xvld $xr0, $a6, %pc_lo12(.LCPI3_2) pcalau12i $a6, %pc_hi20(.LCPI3_1) - xvld $xr0, $a6, %pc_lo12(.LCPI3_1) - xvreplgr2vr.w $xr1, $a0 + xvld $xr1, $a6, %pc_lo12(.LCPI3_1) + xvreplgr2vr.w $xr2, $a0 addi.d $a6, $a1, 4 - xvldi $xr2, -800 + xvldi $xr3, -800 move $a7, $a5 .p2align 4, , 16 .LBB3_143: # %vector.body1114 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr1, $xr0 - xvpermi.q $xr4, $xr3, 1 + xvmul.w $xr4, $xr2, $xr0 + xvpermi.q $xr5, $xr4, 1 + vext2xv.d.w $xr5, $xr5 + xvffint.d.l $xr5, $xr5 vext2xv.d.w $xr4, $xr4 + xvld $xr6, $a6, 0 xvffint.d.l $xr4, $xr4 - vext2xv.d.w $xr3, $xr3 - xvld $xr5, $a6, 0 - xvffint.d.l $xr3, $xr3 - xvfmul.d $xr3, $xr3, $xr2 - xvfmul.d $xr4, $xr4, $xr2 - xvpermi.q $xr6, $xr5, 1 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + xvfmul.d $xr4, $xr4, $xr3 + xvfmul.d $xr5, $xr5, $xr3 + xvpermi.q $xr7, $xr6, 1 + vreplvei.w $vr8, $vr7, 3 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr7, 1 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr8, 16 + xvpermi.q $xr7, $xr9, 2 + vreplvei.w $vr8, $vr6, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 + fcvt.d.s $ft0, $ft0 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvpermi.q $xr6, $xr8, 2 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 + xvfadd.d $xr5, $xr7, $xr5 xvfadd.d $xr4, $xr6, $xr4 - xvfadd.d $xr3, $xr5, $xr3 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpermi.q $xr5, $xr6, 2 - xvst $xr5, $a6, 0 + xvfcvt.s.d $xr4, $xr5, $xr4 + xvperm.w $xr4, $xr4, $xr1 + xvst $xr4, $a6, 0 xvaddi.wu $xr0, $xr0, 8 addi.d $a7, $a7, -8 addi.d $a6, $a6, 32 @@ -3002,7 +2807,7 @@ Q__align: # @Q__align andi $a5, $a3, 12 move $a4, $a3 bstrins.d $a4, $zero, 3, 0 - xvreplve0.w $xr0, $xr19 + xvreplve0.w $xr0, $xr18 addi.d $a7, $a1, 36 xvrepli.b $xr1, 0 xvreplgr2vr.w $xr2, $s3 @@ -3034,7 +2839,7 @@ Q__align: # @Q__align ori $a7, $zero, 1 move $a5, $a3 bstrins.d $a5, $a7, 1, 0 - vreplvei.w $vr0, $vr19, 0 + vreplvei.w $vr0, $vr18, 0 addi.d $a7, $a1, 4 slli.d $t0, $a4, 2 sub.d $a4, $a4, $a6 @@ -3063,7 +2868,7 @@ Q__align: # @Q__align # =>This Inner Loop Header: Depth=1 fldx.s $fa1, $a4, $a3 stx.w $zero, $a1, $a3 - fmadd.s $fa1, $ft11, $fa0, $fa1 + fmadd.s $fa1, $ft10, $fa0, $fa1 fstx.s $fa1, $a0, $a3 addi.d $a2, $a2, -1 addi.d $a3, $a3, 4 @@ -3160,7 +2965,7 @@ Q__align: # @Q__align bstrpick.d $t0, $t0, 31, 0 slli.d $t1, $t0, 2 addi.d $t1, $t1, 4 - st.d $t1, $sp, 152 # 8-byte Folded Spill + st.d $t1, $sp, 144 # 8-byte Folded Spill addi.d $t1, $t0, 1 bstrpick.d $t0, $t1, 32, 4 movgr2fr.w $fs1, $s3 @@ -3172,12 +2977,12 @@ Q__align: # @Q__align slli.d $t0, $t0, 4 st.d $t0, $sp, 80 # 8-byte Folded Spill addi.d $s4, $a0, 4 - addi.d $s2, $a1, 4 + addi.d $fp, $a1, 4 move $a0, $s6 addi.d $s6, $a2, 4 addi.d $s0, $a4, 4 - addi.d $s7, $a5, 4 - addi.d $fp, $a6, 4 + addi.d $s2, $a5, 4 + addi.d $s7, $a6, 4 addi.d $s5, $a3, 4 addi.d $s3, $a7, 4 movgr2fr.w $fs0, $zero @@ -3191,15 +2996,15 @@ Q__align: # @Q__align ld.d $a1, $sp, 104 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(Q__align.ijp) st.d $a1, $sp, 256 # 8-byte Folded Spill - st.d $t1, $sp, 120 # 8-byte Folded Spill + st.d $t1, $sp, 128 # 8-byte Folded Spill andi $a1, $t1, 12 st.d $a1, $sp, 40 # 8-byte Folded Spill - st.d $t7, $sp, 128 # 8-byte Folded Spill + st.d $t7, $sp, 136 # 8-byte Folded Spill sub.d $a1, $a0, $t7 st.d $a1, $sp, 24 # 8-byte Folded Spill sub.d $a0, $a0, $t8 st.d $a0, $sp, 72 # 8-byte Folded Spill - st.d $t8, $sp, 112 # 8-byte Folded Spill + st.d $t8, $sp, 120 # 8-byte Folded Spill sub.d $a0, $zero, $t8 st.d $a0, $sp, 64 # 8-byte Folded Spill ori $s8, $zero, 1 @@ -3286,7 +3091,7 @@ Q__align: # @Q__align bltu $a0, $a1, .LBB3_190 # %bb.188: # %vector.memcheck1237 # in Loop: Header=BB3_183 Depth=1 - ld.d $a1, $sp, 152 # 8-byte Folded Reload + ld.d $a1, $sp, 144 # 8-byte Folded Reload add.d $a0, $a2, $a1 bgeu $t2, $a0, .LBB3_201 # %bb.189: # %vector.memcheck1237 @@ -3369,12 +3174,12 @@ Q__align: # @Q__align .LBB3_195: # %.lr.ph621 # Parent Loop BB3_183 Depth=1 # => This Inner Loop Header: Depth=2 - fldx.s $ft2, $s7, $a3 + fldx.s $ft2, $s2, $a3 fldx.s $ft1, $t4, $a3 fldx.s $ft3, $s3, $a3 fmul.s $ft2, $fa7, $ft2 fadd.s $ft2, $ft1, $ft2 - fldx.s $ft4, $fp, $a3 + fldx.s $ft4, $s7, $a3 fmul.s $ft3, $fa2, $ft3 fadd.s $ft2, $ft2, $ft3 fldx.s $ft3, $s5, $a3 @@ -3402,7 +3207,7 @@ Q__align: # @Q__align fcmp.cule.s $fcc0, $ft4, $fs0 bcnez $fcc0, .LBB3_199 # %bb.198: # in Loop: Header=BB3_195 Depth=2 - ldx.w $t1, $s2, $a3 + ldx.w $t1, $fp, $a3 sub.d $t1, $s8, $t1 stx.w $t1, $a6, $a3 fmov.s $fs0, $ft4 @@ -3414,7 +3219,7 @@ Q__align: # @Q__align bcnez $fcc0, .LBB3_194 # %bb.200: # in Loop: Header=BB3_195 Depth=2 fstx.s $ft4, $s4, $a3 - stx.w $t7, $s2, $a3 + stx.w $t7, $fp, $a3 b .LBB3_194 .LBB3_201: # %vector.main.loop.iter.check1248 # in Loop: Header=BB3_183 Depth=1 @@ -3428,7 +3233,7 @@ Q__align: # @Q__align # in Loop: Header=BB3_183 Depth=1 addi.d $a0, $t2, 32 addi.d $a1, $a2, 32 - ld.d $a3, $sp, 128 # 8-byte Folded Reload + ld.d $a3, $sp, 136 # 8-byte Folded Reload .p2align 4, , 16 .LBB3_204: # %vector.body1252 # Parent Loop BB3_183 Depth=1 @@ -3447,12 +3252,12 @@ Q__align: # @Q__align bnez $a3, .LBB3_204 # %bb.205: # %middle.block1261 # in Loop: Header=BB3_183 Depth=1 - ld.d $a0, $sp, 120 # 8-byte Folded Reload - ld.d $a1, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a1, $sp, 136 # 8-byte Folded Reload beq $a0, $a1, .LBB3_192 # %bb.206: # %vec.epilog.iter.check1265 # in Loop: Header=BB3_183 Depth=1 - ld.d $a4, $sp, 128 # 8-byte Folded Reload + ld.d $a4, $sp, 136 # 8-byte Folded Reload ld.d $a0, $sp, 40 # 8-byte Folded Reload beqz $a0, .LBB3_210 .LBB3_207: # %vec.epilog.ph1267 @@ -3479,8 +3284,8 @@ Q__align: # @Q__align # %bb.209: # %vec.epilog.middle.block1280 # in Loop: Header=BB3_183 Depth=1 ld.d $a2, $sp, 72 # 8-byte Folded Reload - ld.d $a3, $sp, 120 # 8-byte Folded Reload - ld.d $a4, $sp, 112 # 8-byte Folded Reload + ld.d $a3, $sp, 128 # 8-byte Folded Reload + ld.d $a4, $sp, 120 # 8-byte Folded Reload bne $a3, $a4, .LBB3_191 b .LBB3_192 .LBB3_210: # in Loop: Header=BB3_183 Depth=1 @@ -3499,8 +3304,8 @@ Q__align: # @Q__align move $t3, $t2 ld.d $s7, $sp, 96 # 8-byte Folded Reload ld.d $s5, $sp, 208 # 8-byte Folded Reload - ld.d $s4, $sp, 136 # 8-byte Folded Reload - ld.d $s2, $sp, 144 # 8-byte Folded Reload + ld.d $s4, $sp, 112 # 8-byte Folded Reload + ld.d $s2, $sp, 160 # 8-byte Folded Reload ld.d $s6, $sp, 240 # 8-byte Folded Reload bnez $a1, .LBB3_227 .LBB3_213: # %.preheader555 @@ -3523,19 +3328,29 @@ Q__align: # @Q__align move $a3, $a2 bstrins.d $a3, $a5, 2, 0 xvreplgr2vr.w $xr0, $a0 - pcalau12i $a5, %pc_hi20(.LCPI3_2) - xvld $xr1, $a5, %pc_lo12(.LCPI3_2) pcalau12i $a5, %pc_hi20(.LCPI3_3) - xvld $xr2, $a5, %pc_lo12(.LCPI3_3) - xvreplgr2vr.d $xr3, $s6 + xvld $xr1, $a5, %pc_lo12(.LCPI3_3) + pcalau12i $a5, %pc_hi20(.LCPI3_4) + xvld $xr2, $a5, %pc_lo12(.LCPI3_4) + pcalau12i $a5, %pc_hi20(.LCPI3_1) + xvld $xr3, $a5, %pc_lo12(.LCPI3_1) + xvreplgr2vr.d $xr4, $s6 addi.d $a5, $t3, 4 - xvldi $xr4, -800 + xvldi $xr5, -800 move $a6, $a4 .p2align 4, , 16 .LBB3_216: # %vector.body1298 # =>This Inner Loop Header: Depth=1 - xvsub.d $xr5, $xr3, $xr1 - xvsub.d $xr6, $xr3, $xr2 + xvsub.d $xr6, $xr4, $xr1 + xvsub.d $xr7, $xr4, $xr2 + xvpickve2gr.d $a7, $xr7, 0 + vinsgr2vr.w $vr8, $a7, 0 + xvpickve2gr.d $a7, $xr7, 1 + vinsgr2vr.w $vr8, $a7, 1 + xvpickve2gr.d $a7, $xr7, 2 + vinsgr2vr.w $vr8, $a7, 2 + xvpickve2gr.d $a7, $xr7, 3 + vinsgr2vr.w $vr8, $a7, 3 xvpickve2gr.d $a7, $xr6, 0 vinsgr2vr.w $vr7, $a7, 0 xvpickve2gr.d $a7, $xr6, 1 @@ -3544,73 +3359,44 @@ Q__align: # @Q__align vinsgr2vr.w $vr7, $a7, 2 xvpickve2gr.d $a7, $xr6, 3 vinsgr2vr.w $vr7, $a7, 3 - xvpickve2gr.d $a7, $xr5, 0 - vinsgr2vr.w $vr6, $a7, 0 - xvpickve2gr.d $a7, $xr5, 1 - vinsgr2vr.w $vr6, $a7, 1 - xvpickve2gr.d $a7, $xr5, 2 - vinsgr2vr.w $vr6, $a7, 2 - xvpickve2gr.d $a7, $xr5, 3 - vinsgr2vr.w $vr6, $a7, 3 - xvpermi.q $xr7, $xr6, 2 - xvmul.w $xr5, $xr0, $xr7 - xvpermi.q $xr6, $xr5, 1 + xvpermi.q $xr8, $xr7, 2 + xvmul.w $xr6, $xr0, $xr8 + xvpermi.q $xr7, $xr6, 1 + vext2xv.d.w $xr7, $xr7 + xvffint.d.l $xr7, $xr7 vext2xv.d.w $xr6, $xr6 + xvld $xr8, $a5, 0 xvffint.d.l $xr6, $xr6 - vext2xv.d.w $xr5, $xr5 - xvld $xr7, $a5, 0 - xvffint.d.l $xr5, $xr5 - xvfmul.d $xr5, $xr5, $xr4 - xvfmul.d $xr6, $xr6, $xr4 - xvpermi.q $xr8, $xr7, 1 - vreplvei.w $vr9, $vr8, 3 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr10, $vr8, 2 + xvfmul.d $xr6, $xr6, $xr5 + xvfmul.d $xr7, $xr7, $xr5 + xvpermi.q $xr9, $xr8, 1 + vreplvei.w $vr10, $vr9, 3 + fcvt.d.s $ft2, $ft2 + vreplvei.w $vr11, $vr9, 2 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr10, 16 + vreplvei.w $vr10, $vr9, 1 fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr9, 16 - vreplvei.w $vr9, $vr8, 1 + vreplvei.w $vr9, $vr9, 0 fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr10, 16 + xvpermi.q $xr9, $xr11, 2 + vreplvei.w $vr10, $vr8, 3 + fcvt.d.s $ft2, $ft2 + vreplvei.w $vr11, $vr8, 2 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr10, 16 + vreplvei.w $vr10, $vr8, 1 + fcvt.d.s $ft2, $ft2 vreplvei.w $vr8, $vr8, 0 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr9, 16 - xvpermi.q $xr8, $xr10, 2 - vreplvei.w $vr9, $vr7, 3 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr10, $vr7, 2 - fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr9, 16 - vreplvei.w $vr9, $vr7, 1 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr7, $vr7, 0 - fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr9, 16 - xvpermi.q $xr7, $xr10, 2 + vextrins.d $vr8, $vr10, 16 + xvpermi.q $xr8, $xr11, 2 + xvfadd.d $xr7, $xr9, $xr7 xvfadd.d $xr6, $xr8, $xr6 - xvfadd.d $xr5, $xr7, $xr5 - xvpickve.d $xr7, $xr6, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr6, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr6, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr6, $xr6, 3 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr8, $vr6, 48 - xvpickve.d $xr6, $xr5, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr5, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr5, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 48 - xvpermi.q $xr7, $xr8, 2 - xvst $xr7, $a5, 0 + xvfcvt.s.d $xr6, $xr7, $xr6 + xvperm.w $xr6, $xr6, $xr3 + xvst $xr6, $a5, 0 xvaddi.du $xr2, $xr2, 8 xvaddi.du $xr1, $xr1, 8 addi.d $a6, $a6, -8 @@ -3666,8 +3452,8 @@ Q__align: # @Q__align move $a0, $a3 bstrins.d $a0, $a5, 1, 0 xvreplve0.d $xr2, $xr0 - pcalau12i $a5, %pc_hi20(.LCPI3_4) - vld $vr3, $a5, %pc_lo12(.LCPI3_4) + pcalau12i $a5, %pc_hi20(.LCPI3_5) + vld $vr3, $a5, %pc_lo12(.LCPI3_5) xvreplve0.d $xr4, $xr1 addi.d $a5, $a1, 4 xvldi $xr5, -800 @@ -3692,18 +3478,9 @@ Q__align: # @Q__align vextrins.d $vr7, $vr8, 16 xvpermi.q $xr7, $xr9, 2 xvfmadd.d $xr6, $xr4, $xr6, $xr7 - xvpickve.d $xr7, $xr6, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr6, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr6, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr6, $xr6, 3 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr8, $vr6, 48 - vst $vr8, $a5, 0 + xvpermi.q $xr7, $xr6, 1 + vfcvt.s.d $vr6, $vr7, $vr6 + vst $vr6, $a5, 0 vaddi.wu $vr3, $vr3, 4 addi.d $a6, $a6, -4 addi.d $a5, $a5, 16 @@ -3736,7 +3513,7 @@ Q__align: # @Q__align ld.d $s3, $a0, %pc_lo12(Q__align.lastverticalw) ld.d $a0, $sp, 184 # 8-byte Folded Reload ld.d $s6, $a0, %pc_lo12(Q__align.mseq1) - ld.d $a0, $sp, 160 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $s4, $a0, %pc_lo12(Q__align.mseq2) ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.d $s2, $a0, %pc_lo12(Q__align.ijp) @@ -3852,8 +3629,8 @@ Q__align: # @Q__align move $a3, $zero b .LBB3_246 .LBB3_243: # %vector.ph1329 - pcalau12i $a4, %pc_hi20(.LCPI3_5) - vld $vr0, $a4, %pc_lo12(.LCPI3_5) + pcalau12i $a4, %pc_hi20(.LCPI3_6) + vld $vr0, $a4, %pc_lo12(.LCPI3_6) bstrpick.d $a3, $a3, 31, 2 slli.d $a3, $a3, 2 addi.d $a4, $s2, 16 @@ -3916,8 +3693,8 @@ Q__align: # @Q__align andi $a7, $a6, 12 bstrpick.d $a5, $a6, 31, 4 slli.d $a5, $a5, 4 - pcalau12i $t0, %pc_hi20(.LCPI3_6) - xvld $xr0, $t0, %pc_lo12(.LCPI3_6) + pcalau12i $t0, %pc_hi20(.LCPI3_7) + xvld $xr0, $t0, %pc_lo12(.LCPI3_7) addi.d $t0, $a3, 32 xvrepli.b $xr1, -1 xvrepli.w $xr2, -9 @@ -3939,8 +3716,8 @@ Q__align: # @Q__align beqz $a7, .LBB3_260 .LBB3_257: # %vec.epilog.ph1363 move $a7, $a5 - pcalau12i $a5, %pc_hi20(.LCPI3_7) - vld $vr0, $a5, %pc_lo12(.LCPI3_7) + pcalau12i $a5, %pc_hi20(.LCPI3_8) + vld $vr0, $a5, %pc_lo12(.LCPI3_8) bstrpick.d $a5, $a6, 31, 2 slli.d $a5, $a5, 2 vreplgr2vr.w $vr1, $a7 @@ -4278,7 +4055,7 @@ Q__align: # @Q__align .LBB3_307: # %.preheader blez $s7, .LBB3_310 # %bb.308: # %.lr.ph644 - ld.d $a0, $sp, 160 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $fp, $a0, %pc_lo12(Q__align.mseq2) .p2align 4, , 16 .LBB3_309: # =>This Inner Loop Header: Depth=1 @@ -4381,10 +4158,10 @@ Q__align: # @Q__align alsl.d $a4, $a5, $t3, 2 b .LBB3_121 .LBB3_321: - ld.d $s2, $sp, 144 # 8-byte Folded Reload + ld.d $s2, $sp, 160 # 8-byte Folded Reload b .LBB3_27 .LBB3_322: - ld.d $s2, $sp, 144 # 8-byte Folded Reload + ld.d $s2, $sp, 160 # 8-byte Folded Reload b .LBB3_32 .Lfunc_end3: .size Q__align, .Lfunc_end3-Q__align @@ -5466,6 +5243,15 @@ Atracking: # @Atracking .section .rodata.cst32,"aM",@progbits,32 .p2align 5, 0x0 .LCPI6_1: + .word 0 # 0x0 + .word 1 # 0x1 + .word 4 # 0x4 + .word 5 # 0x5 + .word 2 # 0x2 + .word 3 # 0x3 + .word 6 # 0x6 + .word 7 # 0x7 +.LCPI6_2: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 @@ -5474,17 +5260,17 @@ Atracking: # @Atracking .word 6 # 0x6 .word 7 # 0x7 .word 8 # 0x8 -.LCPI6_2: +.LCPI6_3: .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 .dword 8 # 0x8 -.LCPI6_3: +.LCPI6_4: .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 .dword 4 # 0x4 -.LCPI6_6: +.LCPI6_7: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -5495,15 +5281,15 @@ Atracking: # @Atracking .word 7 # 0x7 .section .rodata.cst16,"aM",@progbits,16 .p2align 4, 0x0 -.LCPI6_4: +.LCPI6_5: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 .word 4 # 0x4 -.LCPI6_5: +.LCPI6_6: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI6_7: +.LCPI6_8: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -5544,7 +5330,7 @@ Q__align_gapmap: # @Q__align_gapmap ld.w $fp, $a0, 0 pcalau12i $s3, %pc_hi20(Q__align_gapmap.mseq1) pcalau12i $a0, %pc_hi20(Q__align_gapmap.mseq2) - st.d $a0, $sp, 112 # 8-byte Folded Spill + st.d $a0, $sp, 120 # 8-byte Folded Spill bnez $s6, .LBB6_2 # %bb.1: pcalau12i $a0, %got_pc_hi20(njob) @@ -5563,7 +5349,7 @@ Q__align_gapmap: # @Q__align_gapmap jirl $ra, $ra, 0 ld.d $a1, $sp, 304 # 8-byte Folded Reload ld.w $s6, $a1, %pc_lo12(Q__align_gapmap.orlgth1) - ld.d $a1, $sp, 112 # 8-byte Folded Reload + ld.d $a1, $sp, 120 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.mseq2) .LBB6_2: st.d $s0, $sp, 440 # 8-byte Folded Spill @@ -5633,7 +5419,7 @@ Q__align_gapmap: # @Q__align_gapmap pcalau12i $a0, %pc_hi20(Q__align_gapmap.fg_t_og_h_dg_n1_p) st.d $a0, $sp, 184 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.fg_t_og_h_dg_n2_p) - st.d $a0, $sp, 104 # 8-byte Folded Spill + st.d $a0, $sp, 96 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.gapz_n1) st.d $a0, $sp, 192 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.gapz_n2) @@ -5645,9 +5431,9 @@ Q__align_gapmap: # @Q__align_gapmap st.d $a0, $sp, 248 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.intwork) st.d $a0, $sp, 240 # 8-byte Folded Spill - st.d $s3, $sp, 120 # 8-byte Folded Spill + st.d $s3, $sp, 112 # 8-byte Folded Spill st.d $s1, $sp, 80 # 8-byte Folded Spill - st.d $s2, $sp, 96 # 8-byte Folded Spill + st.d $s2, $sp, 104 # 8-byte Folded Spill st.d $a6, $sp, 368 # 8-byte Folded Spill move $s4, $a1 st.d $a1, $sp, 264 # 8-byte Folded Spill @@ -5771,7 +5557,7 @@ Q__align_gapmap: # @Q__align_gapmap ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.fg_t_og_h_dg_n1_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.fg_t_og_h_dg_n2_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 @@ -5968,7 +5754,7 @@ Q__align_gapmap: # @Q__align_gapmap move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 104 # 8-byte Folded Reload + ld.d $a1, $sp, 96 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.fg_t_og_h_dg_n2_p) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -6014,8 +5800,8 @@ Q__align_gapmap: # @Q__align_gapmap st.w $s6, $s8, %pc_lo12(Q__align_gapmap.orlgth1) ld.d $a0, $sp, 72 # 8-byte Folded Reload st.w $s0, $a0, %pc_lo12(Q__align_gapmap.orlgth2) - ld.d $s3, $sp, 120 # 8-byte Folded Reload - ld.d $s2, $sp, 96 # 8-byte Folded Reload + ld.d $s3, $sp, 112 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload ld.d $a6, $sp, 368 # 8-byte Folded Reload ld.d $s8, $sp, 136 # 8-byte Folded Reload ld.d $fp, $sp, 32 # 8-byte Folded Reload @@ -6043,7 +5829,7 @@ Q__align_gapmap: # @Q__align_gapmap blez $a5, .LBB6_14 # %bb.12: # %.lr.ph564 ld.d $a1, $a7, %pc_lo12(Q__align_gapmap.mseq) - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 120 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.mseq2) ld.d $a2, $sp, 448 # 8-byte Folded Reload alsl.d $a1, $a2, $a1, 3 @@ -6111,7 +5897,7 @@ Q__align_gapmap: # @Q__align_gapmap st.d $a0, $a1, 0 st.w $s2, $fp, 0 st.w $s0, $s1, 0 - ld.d $s2, $sp, 96 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload .LBB6_21: ld.d $a1, $s4, %pc_lo12(Q__align_gapmap.cpmx1) ffint.s.w $fa0, $fs0 @@ -6379,12 +6165,12 @@ Q__align_gapmap: # @Q__align_gapmap .LBB6_24: ld.d $s4, $sp, 80 # 8-byte Folded Reload addi.w $a0, $zero, -1 - xvld $xr19, $sp, 304 # 32-byte Folded Reload - fcvt.d.s $fa0, $ft11 + xvld $xr18, $sp, 304 # 32-byte Folded Reload + fcvt.d.s $fa0, $ft10 ld.d $ra, $sp, 408 # 8-byte Folded Reload st.d $a0, $sp, 296 # 8-byte Folded Spill ld.d $s8, $sp, 256 # 8-byte Folded Reload - ld.d $s5, $sp, 120 # 8-byte Folded Reload + ld.d $s5, $sp, 112 # 8-byte Folded Reload blt $s1, $a0, .LBB6_29 # %bb.25: # %.lr.ph567 ld.d $a0, $sp, 416 # 8-byte Folded Reload @@ -6399,7 +6185,7 @@ Q__align_gapmap: # @Q__align_gapmap ld.d $a4, $a4, %pc_lo12(Q__align_gapmap.fg_h_dg_n2_p) ld.d $a5, $sp, 176 # 8-byte Folded Reload ld.d $a5, $a5, %pc_lo12(Q__align_gapmap.og_t_fg_h_dg_n2_p) - ld.d $a6, $sp, 104 # 8-byte Folded Reload + ld.d $a6, $sp, 96 # 8-byte Folded Reload ld.d $a6, $a6, %pc_lo12(Q__align_gapmap.fg_t_og_h_dg_n2_p) ld.d $a7, $sp, 384 # 8-byte Folded Reload ld.d $a7, $a7, %pc_lo12(Q__align_gapmap.gapz2) @@ -6626,7 +6412,7 @@ Q__align_gapmap: # @Q__align_gapmap .LBB6_38: # %clearvec.exit ld.d $ra, $sp, 408 # 8-byte Folded Reload ld.d $t4, $sp, 368 # 8-byte Folded Reload - xvld $xr19, $sp, 304 # 32-byte Folded Reload + xvld $xr18, $sp, 304 # 32-byte Folded Reload ld.d $a0, $sp, 56 # 8-byte Folded Reload move $t5, $s0 move $t6, $s1 @@ -6776,7 +6562,7 @@ Q__align_gapmap: # @Q__align_gapmap sltu $s1, $a5, $fp sltu $s0, $a7, $s0 and $s0, $s1, $s0 - ld.d $s2, $sp, 96 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload bnez $s0, .LBB6_27 # %bb.65: # %vector.memcheck sltu $s0, $a6, $t4 @@ -6827,9 +6613,11 @@ Q__align_gapmap: # @Q__align_gapmap bstrpick.d $t2, $t3, 31, 3 slli.d $t2, $t2, 3 xvreplve0.d $xr1, $xr0 - xvldi $xr2, -912 - xvldi $xr3, -928 - xvldi $xr4, -1424 + pcalau12i $t3, %pc_hi20(.LCPI6_1) + xvld $xr2, $t3, %pc_lo12(.LCPI6_1) + xvldi $xr3, -912 + xvldi $xr4, -928 + xvldi $xr5, -1424 move $t3, $a0 move $t4, $a1 move $t5, $a2 @@ -6837,218 +6625,134 @@ Q__align_gapmap: # @Q__align_gapmap move $t7, $a4 move $t8, $a5 move $s0, $a6 - move $s3, $a7 - move $s4, $t0 - move $s5, $t2 + move $s2, $a7 + move $s3, $t0 + move $s4, $t2 .p2align 4, , 16 .LBB6_75: # %vector.body # =>This Inner Loop Header: Depth=1 - xvld $xr6, $t3, 0 - xvpermi.q $xr5, $xr6, 1 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 + xvld $xr7, $t3, 0 + xvpermi.q $xr6, $xr7, 1 + vreplvei.w $vr8, $vr6, 3 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 - fcvt.d.s $fa7, $fa7 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvld $xr11, $t4, 0 - xvpermi.q $xr6, $xr8, 2 - xvfsub.d $xr9, $xr2, $xr5 - xvfsub.d $xr10, $xr2, $xr6 - xvpermi.q $xr7, $xr11, 1 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 vreplvei.w $vr8, $vr7, 3 fcvt.d.s $ft0, $ft0 - vreplvei.w $vr12, $vr7, 2 - fcvt.d.s $ft4, $ft4 - vextrins.d $vr12, $vr8, 16 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 vreplvei.w $vr8, $vr7, 1 fcvt.d.s $ft0, $ft0 vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 vextrins.d $vr7, $vr8, 16 - xvpermi.q $xr7, $xr12, 2 - vreplvei.w $vr8, $vr11, 3 - fcvt.d.s $ft0, $ft0 - vreplvei.w $vr12, $vr11, 2 + xvld $xr11, $t4, 0 + xvpermi.q $xr7, $xr9, 2 + xvfsub.d $xr8, $xr3, $xr6 + xvfsub.d $xr9, $xr3, $xr7 + xvpermi.q $xr10, $xr11, 1 + vreplvei.w $vr12, $vr10, 3 fcvt.d.s $ft4, $ft4 - vextrins.d $vr12, $vr8, 16 - vreplvei.w $vr8, $vr11, 1 - fcvt.d.s $ft5, $ft0 - vreplvei.w $vr8, $vr11, 0 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr13, 16 - xvpermi.q $xr8, $xr12, 2 - xvfsub.d $xr11, $xr9, $xr7 - xvfsub.d $xr12, $xr10, $xr8 - xvfmul.d $xr11, $xr11, $xr1 - xvfmul.d $xr12, $xr12, $xr1 - xvfmul.d $xr11, $xr11, $xr3 - xvfmul.d $xr12, $xr12, $xr3 - xvpickve.d $xr13, $xr11, 1 - fcvt.s.d $ft5, $ft5 - xvpickve.d $xr14, $xr11, 0 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr14, $vr13, 16 - xvpickve.d $xr13, $xr11, 2 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr14, $vr13, 32 - xvpickve.d $xr11, $xr11, 3 - fcvt.s.d $ft3, $ft3 - vextrins.w $vr14, $vr11, 48 - xvpickve.d $xr11, $xr12, 1 - fcvt.s.d $ft3, $ft3 - xvpickve.d $xr13, $xr12, 0 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr13, $vr11, 16 - xvpickve.d $xr11, $xr12, 2 - fcvt.s.d $ft3, $ft3 - vextrins.w $vr13, $vr11, 32 - xvpickve.d $xr11, $xr12, 3 - fcvt.s.d $ft3, $ft3 - xvld $xr12, $t6, 0 - vextrins.w $vr13, $vr11, 48 - xvpermi.q $xr13, $xr14, 2 - xvst $xr13, $t5, 0 - xvpermi.q $xr11, $xr12, 1 - vreplvei.w $vr13, $vr11, 3 + vreplvei.w $vr13, $vr10, 2 fcvt.d.s $ft5, $ft5 - vreplvei.w $vr14, $vr11, 2 - fcvt.d.s $ft6, $ft6 - vextrins.d $vr14, $vr13, 16 - vreplvei.w $vr13, $vr11, 1 - fcvt.d.s $ft7, $ft5 + vextrins.d $vr13, $vr12, 16 + vreplvei.w $vr12, $vr10, 1 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr10, $vr10, 0 + fcvt.d.s $ft2, $ft2 + vextrins.d $vr10, $vr12, 16 + xvpermi.q $xr10, $xr13, 2 + vreplvei.w $vr12, $vr11, 3 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr13, $vr11, 2 + fcvt.d.s $ft5, $ft5 + vextrins.d $vr13, $vr12, 16 + vreplvei.w $vr12, $vr11, 1 + fcvt.d.s $ft4, $ft4 vreplvei.w $vr11, $vr11, 0 - fcvt.d.s $ft5, $ft3 - vextrins.d $vr13, $vr15, 16 - xvpermi.q $xr13, $xr14, 2 - xvfsub.d $xr11, $xr2, $xr13 - xvfsub.d $xr14, $xr11, $xr7 - vreplvei.w $vr15, $vr12, 3 - fcvt.d.s $ft7, $ft7 - vreplvei.w $vr16, $vr12, 2 - fcvt.d.s $ft8, $ft8 - vextrins.d $vr16, $vr15, 16 - vreplvei.w $vr15, $vr12, 1 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr12, 16 + xvpermi.q $xr11, $xr13, 2 + xvfsub.d $xr12, $xr8, $xr10 + xvfsub.d $xr13, $xr9, $xr11 + xvfmul.d $xr12, $xr12, $xr1 + xvfmul.d $xr13, $xr13, $xr1 + xvfmul.d $xr12, $xr12, $xr4 + xvfmul.d $xr13, $xr13, $xr4 + xvld $xr14, $t6, 0 + xvfcvt.s.d $xr12, $xr12, $xr13 + xvperm.w $xr12, $xr12, $xr2 + xvst $xr12, $t5, 0 + xvpermi.q $xr12, $xr14, 1 + vreplvei.w $vr13, $vr12, 3 + fcvt.d.s $ft5, $ft5 + vreplvei.w $vr15, $vr12, 2 fcvt.d.s $ft7, $ft7 + vextrins.d $vr15, $vr13, 16 + vreplvei.w $vr13, $vr12, 1 + fcvt.d.s $ft5, $ft5 vreplvei.w $vr12, $vr12, 0 - fcvt.d.s $ft9, $ft4 - vextrins.d $vr17, $vr15, 16 - xvpermi.q $xr17, $xr16, 2 - xvfsub.d $xr12, $xr2, $xr17 - xvfsub.d $xr15, $xr12, $xr8 - xvfmul.d $xr14, $xr14, $xr1 + fcvt.d.s $ft4, $ft4 + vextrins.d $vr12, $vr13, 16 + xvpermi.q $xr12, $xr15, 2 + xvfsub.d $xr13, $xr3, $xr12 + xvfsub.d $xr15, $xr13, $xr10 + vreplvei.w $vr16, $vr14, 3 + fcvt.d.s $ft8, $ft8 + vreplvei.w $vr17, $vr14, 2 + fcvt.d.s $ft9, $ft9 + vextrins.d $vr17, $vr16, 16 + vreplvei.w $vr16, $vr14, 1 + fcvt.d.s $ft8, $ft8 + vreplvei.w $vr14, $vr14, 0 + fcvt.d.s $ft6, $ft6 + vextrins.d $vr14, $vr16, 16 + xvpermi.q $xr14, $xr17, 2 + xvfsub.d $xr16, $xr3, $xr14 + xvfsub.d $xr17, $xr16, $xr11 xvfmul.d $xr15, $xr15, $xr1 - xvfmul.d $xr14, $xr14, $xr3 - xvfmul.d $xr15, $xr15, $xr3 - xvpickve.d $xr16, $xr14, 1 - fcvt.s.d $ft8, $ft8 - xvpickve.d $xr18, $xr14, 0 - fcvt.s.d $ft10, $ft10 - vextrins.w $vr18, $vr16, 16 - xvpickve.d $xr16, $xr14, 2 - fcvt.s.d $ft8, $ft8 - vextrins.w $vr18, $vr16, 32 - xvpickve.d $xr14, $xr14, 3 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr18, $vr14, 48 - xvpickve.d $xr14, $xr15, 1 - fcvt.s.d $ft6, $ft6 - xvpickve.d $xr16, $xr15, 0 - fcvt.s.d $ft8, $ft8 - vextrins.w $vr16, $vr14, 16 - xvpickve.d $xr14, $xr15, 2 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr16, $vr14, 32 - xvpickve.d $xr14, $xr15, 3 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr16, $vr14, 48 - xvpermi.q $xr16, $xr18, 2 - xvst $xr16, $t7, 0 - xvfadd.d $xr9, $xr9, $xr13 - xvfadd.d $xr10, $xr10, $xr17 - xvfsub.d $xr9, $xr9, $xr7 - xvfsub.d $xr10, $xr10, $xr8 - xvfmul.d $xr9, $xr9, $xr3 - xvfmul.d $xr10, $xr10, $xr3 + xvfmul.d $xr17, $xr17, $xr1 + xvfmul.d $xr15, $xr15, $xr4 + xvfmul.d $xr17, $xr17, $xr4 + xvfcvt.s.d $xr15, $xr15, $xr17 + xvperm.w $xr15, $xr15, $xr2 + xvst $xr15, $t7, 0 + xvfadd.d $xr8, $xr8, $xr12 + xvfadd.d $xr9, $xr9, $xr14 + xvfsub.d $xr8, $xr8, $xr10 + xvfsub.d $xr9, $xr9, $xr11 + xvfmul.d $xr8, $xr8, $xr4 + xvfmul.d $xr9, $xr9, $xr4 + xvfmul.d $xr8, $xr8, $xr1 xvfmul.d $xr9, $xr9, $xr1 - xvfmul.d $xr10, $xr10, $xr1 - xvpickve.d $xr13, $xr9, 1 - fcvt.s.d $ft5, $ft5 - xvpickve.d $xr14, $xr9, 0 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr14, $vr13, 16 - xvpickve.d $xr13, $xr9, 2 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr14, $vr13, 32 - xvpickve.d $xr9, $xr9, 3 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr14, $vr9, 48 - xvpickve.d $xr9, $xr10, 1 - fcvt.s.d $ft1, $ft1 - xvpickve.d $xr13, $xr10, 0 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr13, $vr9, 16 - xvpickve.d $xr9, $xr10, 2 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr13, $vr9, 32 - xvpickve.d $xr9, $xr10, 3 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr13, $vr9, 48 - xvpermi.q $xr13, $xr14, 2 - xvst $xr13, $t8, 0 - xvfadd.d $xr5, $xr11, $xr5 - xvfadd.d $xr6, $xr12, $xr6 - xvfsub.d $xr5, $xr5, $xr7 - xvfsub.d $xr6, $xr6, $xr8 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr6, $xr6, $xr3 - xvfmul.d $xr5, $xr5, $xr1 + xvfcvt.s.d $xr8, $xr8, $xr9 + xvperm.w $xr8, $xr8, $xr2 + xvst $xr8, $t8, 0 + xvfadd.d $xr6, $xr13, $xr6 + xvfadd.d $xr7, $xr16, $xr7 + xvfsub.d $xr6, $xr6, $xr10 + xvfsub.d $xr7, $xr7, $xr11 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr7, $xr7, $xr4 xvfmul.d $xr6, $xr6, $xr1 - xvpickve.d $xr7, $xr5, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr5, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr5, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr8, $vr5, 48 - xvpickve.d $xr5, $xr6, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr7, $xr6, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr5, 16 - xvpickve.d $xr5, $xr6, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 32 - xvpickve.d $xr5, $xr6, 3 - fcvt.s.d $fa5, $fa5 - xvld $xr6, $s3, 0 - vextrins.w $vr7, $vr5, 48 - xvpermi.q $xr7, $xr8, 2 - xvst $xr7, $s0, 0 - xvfsub.s $xr5, $xr4, $xr6 - xvst $xr5, $s4, 0 - addi.d $s5, $s5, -8 - addi.d $s4, $s4, 32 + xvfmul.d $xr7, $xr7, $xr1 + xvld $xr8, $s2, 0 + xvfcvt.s.d $xr6, $xr6, $xr7 + xvperm.w $xr6, $xr6, $xr2 + xvst $xr6, $s0, 0 + xvfsub.s $xr6, $xr5, $xr8 + xvst $xr6, $s3, 0 + addi.d $s4, $s4, -8 addi.d $s3, $s3, 32 + addi.d $s2, $s2, 32 addi.d $s0, $s0, 32 addi.d $t8, $t8, 32 addi.d $t7, $t7, 32 @@ -7056,10 +6760,10 @@ Q__align_gapmap: # @Q__align_gapmap addi.d $t5, $t5, 32 addi.d $t4, $t4, 32 addi.d $t3, $t3, 32 - bnez $s5, .LBB6_75 + bnez $s4, .LBB6_75 # %bb.76: # %middle.block - ld.d $s5, $sp, 120 # 8-byte Folded Reload ld.d $s4, $sp, 80 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload bne $t1, $t2, .LBB6_27 b .LBB6_29 .LBB6_77: # %vector.memcheck883 @@ -7176,7 +6880,7 @@ Q__align_gapmap: # @Q__align_gapmap sltu $s1, $a5, $fp sltu $s0, $a7, $s0 and $s0, $s1, $s0 - ld.d $s2, $sp, 96 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload bnez $s0, .LBB6_32 # %bb.98: # %vector.memcheck883 sltu $s0, $a6, $t4 @@ -7227,9 +6931,11 @@ Q__align_gapmap: # @Q__align_gapmap bstrpick.d $t2, $t3, 31, 3 slli.d $t2, $t2, 3 xvreplve0.d $xr1, $xr0 - xvldi $xr2, -912 - xvldi $xr3, -928 - xvldi $xr4, -1424 + pcalau12i $t3, %pc_hi20(.LCPI6_1) + xvld $xr2, $t3, %pc_lo12(.LCPI6_1) + xvldi $xr3, -912 + xvldi $xr4, -928 + xvldi $xr5, -1424 move $t3, $a0 move $t4, $a1 move $t5, $a2 @@ -7237,218 +6943,134 @@ Q__align_gapmap: # @Q__align_gapmap move $t7, $a4 move $t8, $a5 move $s0, $a6 - move $s3, $a7 - move $s4, $t0 - move $s5, $t2 + move $s2, $a7 + move $s3, $t0 + move $s4, $t2 .p2align 4, , 16 .LBB6_108: # %vector.body1019 # =>This Inner Loop Header: Depth=1 - xvld $xr6, $t3, 0 - xvpermi.q $xr5, $xr6, 1 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 + xvld $xr7, $t3, 0 + xvpermi.q $xr6, $xr7, 1 + vreplvei.w $vr8, $vr6, 3 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 - fcvt.d.s $fa7, $fa7 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvld $xr11, $t4, 0 - xvpermi.q $xr6, $xr8, 2 - xvfsub.d $xr9, $xr2, $xr5 - xvfsub.d $xr10, $xr2, $xr6 - xvpermi.q $xr7, $xr11, 1 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 vreplvei.w $vr8, $vr7, 3 fcvt.d.s $ft0, $ft0 - vreplvei.w $vr12, $vr7, 2 - fcvt.d.s $ft4, $ft4 - vextrins.d $vr12, $vr8, 16 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 vreplvei.w $vr8, $vr7, 1 fcvt.d.s $ft0, $ft0 vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 vextrins.d $vr7, $vr8, 16 - xvpermi.q $xr7, $xr12, 2 - vreplvei.w $vr8, $vr11, 3 - fcvt.d.s $ft0, $ft0 - vreplvei.w $vr12, $vr11, 2 + xvld $xr11, $t4, 0 + xvpermi.q $xr7, $xr9, 2 + xvfsub.d $xr8, $xr3, $xr6 + xvfsub.d $xr9, $xr3, $xr7 + xvpermi.q $xr10, $xr11, 1 + vreplvei.w $vr12, $vr10, 3 fcvt.d.s $ft4, $ft4 - vextrins.d $vr12, $vr8, 16 - vreplvei.w $vr8, $vr11, 1 - fcvt.d.s $ft5, $ft0 - vreplvei.w $vr8, $vr11, 0 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr13, 16 - xvpermi.q $xr8, $xr12, 2 - xvfsub.d $xr11, $xr9, $xr7 - xvfsub.d $xr12, $xr10, $xr8 - xvfmul.d $xr11, $xr11, $xr1 - xvfmul.d $xr12, $xr12, $xr1 - xvfmul.d $xr11, $xr11, $xr3 - xvfmul.d $xr12, $xr12, $xr3 - xvpickve.d $xr13, $xr11, 1 - fcvt.s.d $ft5, $ft5 - xvpickve.d $xr14, $xr11, 0 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr14, $vr13, 16 - xvpickve.d $xr13, $xr11, 2 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr14, $vr13, 32 - xvpickve.d $xr11, $xr11, 3 - fcvt.s.d $ft3, $ft3 - vextrins.w $vr14, $vr11, 48 - xvpickve.d $xr11, $xr12, 1 - fcvt.s.d $ft3, $ft3 - xvpickve.d $xr13, $xr12, 0 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr13, $vr11, 16 - xvpickve.d $xr11, $xr12, 2 - fcvt.s.d $ft3, $ft3 - vextrins.w $vr13, $vr11, 32 - xvpickve.d $xr11, $xr12, 3 - fcvt.s.d $ft3, $ft3 - xvld $xr12, $t6, 0 - vextrins.w $vr13, $vr11, 48 - xvpermi.q $xr13, $xr14, 2 - xvst $xr13, $t5, 0 - xvpermi.q $xr11, $xr12, 1 - vreplvei.w $vr13, $vr11, 3 + vreplvei.w $vr13, $vr10, 2 fcvt.d.s $ft5, $ft5 - vreplvei.w $vr14, $vr11, 2 - fcvt.d.s $ft6, $ft6 - vextrins.d $vr14, $vr13, 16 - vreplvei.w $vr13, $vr11, 1 - fcvt.d.s $ft7, $ft5 + vextrins.d $vr13, $vr12, 16 + vreplvei.w $vr12, $vr10, 1 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr10, $vr10, 0 + fcvt.d.s $ft2, $ft2 + vextrins.d $vr10, $vr12, 16 + xvpermi.q $xr10, $xr13, 2 + vreplvei.w $vr12, $vr11, 3 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr13, $vr11, 2 + fcvt.d.s $ft5, $ft5 + vextrins.d $vr13, $vr12, 16 + vreplvei.w $vr12, $vr11, 1 + fcvt.d.s $ft4, $ft4 vreplvei.w $vr11, $vr11, 0 - fcvt.d.s $ft5, $ft3 - vextrins.d $vr13, $vr15, 16 - xvpermi.q $xr13, $xr14, 2 - xvfsub.d $xr11, $xr2, $xr13 - xvfsub.d $xr14, $xr11, $xr7 - vreplvei.w $vr15, $vr12, 3 - fcvt.d.s $ft7, $ft7 - vreplvei.w $vr16, $vr12, 2 - fcvt.d.s $ft8, $ft8 - vextrins.d $vr16, $vr15, 16 - vreplvei.w $vr15, $vr12, 1 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr12, 16 + xvpermi.q $xr11, $xr13, 2 + xvfsub.d $xr12, $xr8, $xr10 + xvfsub.d $xr13, $xr9, $xr11 + xvfmul.d $xr12, $xr12, $xr1 + xvfmul.d $xr13, $xr13, $xr1 + xvfmul.d $xr12, $xr12, $xr4 + xvfmul.d $xr13, $xr13, $xr4 + xvld $xr14, $t6, 0 + xvfcvt.s.d $xr12, $xr12, $xr13 + xvperm.w $xr12, $xr12, $xr2 + xvst $xr12, $t5, 0 + xvpermi.q $xr12, $xr14, 1 + vreplvei.w $vr13, $vr12, 3 + fcvt.d.s $ft5, $ft5 + vreplvei.w $vr15, $vr12, 2 fcvt.d.s $ft7, $ft7 + vextrins.d $vr15, $vr13, 16 + vreplvei.w $vr13, $vr12, 1 + fcvt.d.s $ft5, $ft5 vreplvei.w $vr12, $vr12, 0 - fcvt.d.s $ft9, $ft4 - vextrins.d $vr17, $vr15, 16 - xvpermi.q $xr17, $xr16, 2 - xvfsub.d $xr12, $xr2, $xr17 - xvfsub.d $xr15, $xr12, $xr8 - xvfmul.d $xr14, $xr14, $xr1 + fcvt.d.s $ft4, $ft4 + vextrins.d $vr12, $vr13, 16 + xvpermi.q $xr12, $xr15, 2 + xvfsub.d $xr13, $xr3, $xr12 + xvfsub.d $xr15, $xr13, $xr10 + vreplvei.w $vr16, $vr14, 3 + fcvt.d.s $ft8, $ft8 + vreplvei.w $vr17, $vr14, 2 + fcvt.d.s $ft9, $ft9 + vextrins.d $vr17, $vr16, 16 + vreplvei.w $vr16, $vr14, 1 + fcvt.d.s $ft8, $ft8 + vreplvei.w $vr14, $vr14, 0 + fcvt.d.s $ft6, $ft6 + vextrins.d $vr14, $vr16, 16 + xvpermi.q $xr14, $xr17, 2 + xvfsub.d $xr16, $xr3, $xr14 + xvfsub.d $xr17, $xr16, $xr11 xvfmul.d $xr15, $xr15, $xr1 - xvfmul.d $xr14, $xr14, $xr3 - xvfmul.d $xr15, $xr15, $xr3 - xvpickve.d $xr16, $xr14, 1 - fcvt.s.d $ft8, $ft8 - xvpickve.d $xr18, $xr14, 0 - fcvt.s.d $ft10, $ft10 - vextrins.w $vr18, $vr16, 16 - xvpickve.d $xr16, $xr14, 2 - fcvt.s.d $ft8, $ft8 - vextrins.w $vr18, $vr16, 32 - xvpickve.d $xr14, $xr14, 3 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr18, $vr14, 48 - xvpickve.d $xr14, $xr15, 1 - fcvt.s.d $ft6, $ft6 - xvpickve.d $xr16, $xr15, 0 - fcvt.s.d $ft8, $ft8 - vextrins.w $vr16, $vr14, 16 - xvpickve.d $xr14, $xr15, 2 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr16, $vr14, 32 - xvpickve.d $xr14, $xr15, 3 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr16, $vr14, 48 - xvpermi.q $xr16, $xr18, 2 - xvst $xr16, $t7, 0 - xvfadd.d $xr9, $xr9, $xr13 - xvfadd.d $xr10, $xr10, $xr17 - xvfsub.d $xr9, $xr9, $xr7 - xvfsub.d $xr10, $xr10, $xr8 - xvfmul.d $xr9, $xr9, $xr3 - xvfmul.d $xr10, $xr10, $xr3 + xvfmul.d $xr17, $xr17, $xr1 + xvfmul.d $xr15, $xr15, $xr4 + xvfmul.d $xr17, $xr17, $xr4 + xvfcvt.s.d $xr15, $xr15, $xr17 + xvperm.w $xr15, $xr15, $xr2 + xvst $xr15, $t7, 0 + xvfadd.d $xr8, $xr8, $xr12 + xvfadd.d $xr9, $xr9, $xr14 + xvfsub.d $xr8, $xr8, $xr10 + xvfsub.d $xr9, $xr9, $xr11 + xvfmul.d $xr8, $xr8, $xr4 + xvfmul.d $xr9, $xr9, $xr4 + xvfmul.d $xr8, $xr8, $xr1 xvfmul.d $xr9, $xr9, $xr1 - xvfmul.d $xr10, $xr10, $xr1 - xvpickve.d $xr13, $xr9, 1 - fcvt.s.d $ft5, $ft5 - xvpickve.d $xr14, $xr9, 0 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr14, $vr13, 16 - xvpickve.d $xr13, $xr9, 2 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr14, $vr13, 32 - xvpickve.d $xr9, $xr9, 3 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr14, $vr9, 48 - xvpickve.d $xr9, $xr10, 1 - fcvt.s.d $ft1, $ft1 - xvpickve.d $xr13, $xr10, 0 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr13, $vr9, 16 - xvpickve.d $xr9, $xr10, 2 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr13, $vr9, 32 - xvpickve.d $xr9, $xr10, 3 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr13, $vr9, 48 - xvpermi.q $xr13, $xr14, 2 - xvst $xr13, $t8, 0 - xvfadd.d $xr5, $xr11, $xr5 - xvfadd.d $xr6, $xr12, $xr6 - xvfsub.d $xr5, $xr5, $xr7 - xvfsub.d $xr6, $xr6, $xr8 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr6, $xr6, $xr3 - xvfmul.d $xr5, $xr5, $xr1 + xvfcvt.s.d $xr8, $xr8, $xr9 + xvperm.w $xr8, $xr8, $xr2 + xvst $xr8, $t8, 0 + xvfadd.d $xr6, $xr13, $xr6 + xvfadd.d $xr7, $xr16, $xr7 + xvfsub.d $xr6, $xr6, $xr10 + xvfsub.d $xr7, $xr7, $xr11 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr7, $xr7, $xr4 xvfmul.d $xr6, $xr6, $xr1 - xvpickve.d $xr7, $xr5, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr5, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr5, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr8, $vr5, 48 - xvpickve.d $xr5, $xr6, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr7, $xr6, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr5, 16 - xvpickve.d $xr5, $xr6, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 32 - xvpickve.d $xr5, $xr6, 3 - fcvt.s.d $fa5, $fa5 - xvld $xr6, $s3, 0 - vextrins.w $vr7, $vr5, 48 - xvpermi.q $xr7, $xr8, 2 - xvst $xr7, $s0, 0 - xvfsub.s $xr5, $xr4, $xr6 - xvst $xr5, $s4, 0 - addi.d $s5, $s5, -8 - addi.d $s4, $s4, 32 + xvfmul.d $xr7, $xr7, $xr1 + xvld $xr8, $s2, 0 + xvfcvt.s.d $xr6, $xr6, $xr7 + xvperm.w $xr6, $xr6, $xr2 + xvst $xr6, $s0, 0 + xvfsub.s $xr6, $xr5, $xr8 + xvst $xr6, $s3, 0 + addi.d $s4, $s4, -8 addi.d $s3, $s3, 32 + addi.d $s2, $s2, 32 addi.d $s0, $s0, 32 addi.d $t8, $t8, 32 addi.d $t7, $t7, 32 @@ -7456,10 +7078,10 @@ Q__align_gapmap: # @Q__align_gapmap addi.d $t5, $t5, 32 addi.d $t4, $t4, 32 addi.d $t3, $t3, 32 - bnez $s5, .LBB6_108 + bnez $s4, .LBB6_108 # %bb.109: # %middle.block1033 - ld.d $s5, $sp, 120 # 8-byte Folded Reload ld.d $s4, $sp, 80 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload bne $t1, $t2, .LBB6_32 b .LBB6_34 .LBB6_110: # %clearvec.exit.thread @@ -7502,7 +7124,7 @@ Q__align_gapmap: # @Q__align_gapmap .LBB6_115: # %clearvec.exit532 ld.d $ra, $sp, 408 # 8-byte Folded Reload ld.d $t4, $sp, 368 # 8-byte Folded Reload - xvld $xr19, $sp, 304 # 32-byte Folded Reload + xvld $xr18, $sp, 304 # 32-byte Folded Reload ld.d $a0, $sp, 160 # 8-byte Folded Reload move $t5, $s0 move $t6, $s1 @@ -7622,7 +7244,7 @@ Q__align_gapmap: # @Q__align_gapmap ld.d $a2, $a2, %pc_lo12(Q__align_gapmap.gapz_n1) ld.d $a3, $sp, 176 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(Q__align_gapmap.og_t_fg_h_dg_n2_p) - ld.d $a4, $sp, 104 # 8-byte Folded Reload + ld.d $a4, $sp, 96 # 8-byte Folded Reload ld.d $a4, $a4, %pc_lo12(Q__align_gapmap.fg_t_og_h_dg_n2_p) ld.d $a5, $sp, 344 # 8-byte Folded Reload addi.d $a5, $a5, 1 @@ -7677,73 +7299,54 @@ Q__align_gapmap: # @Q__align_gapmap ori $a7, $zero, 1 move $a5, $a4 bstrins.d $a5, $a7, 2, 0 + pcalau12i $a7, %pc_hi20(.LCPI6_2) + xvld $xr0, $a7, %pc_lo12(.LCPI6_2) pcalau12i $a7, %pc_hi20(.LCPI6_1) - xvld $xr0, $a7, %pc_lo12(.LCPI6_1) - xvreplgr2vr.w $xr1, $a2 + xvld $xr1, $a7, %pc_lo12(.LCPI6_1) + xvreplgr2vr.w $xr2, $a2 addi.d $a7, $t5, 4 - xvldi $xr2, -800 + xvldi $xr3, -800 move $t0, $a6 .p2align 4, , 16 .LBB6_132: # %vector.body1043 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr1, $xr0 - xvpermi.q $xr4, $xr3, 1 + xvmul.w $xr4, $xr2, $xr0 + xvpermi.q $xr5, $xr4, 1 + vext2xv.d.w $xr5, $xr5 + xvffint.d.l $xr5, $xr5 vext2xv.d.w $xr4, $xr4 + xvld $xr6, $a7, 0 xvffint.d.l $xr4, $xr4 - vext2xv.d.w $xr3, $xr3 - xvld $xr5, $a7, 0 - xvffint.d.l $xr3, $xr3 - xvfmul.d $xr3, $xr3, $xr2 - xvfmul.d $xr4, $xr4, $xr2 - xvpermi.q $xr6, $xr5, 1 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + xvfmul.d $xr4, $xr4, $xr3 + xvfmul.d $xr5, $xr5, $xr3 + xvpermi.q $xr7, $xr6, 1 + vreplvei.w $vr8, $vr7, 3 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr7, 1 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr8, 16 + xvpermi.q $xr7, $xr9, 2 + vreplvei.w $vr8, $vr6, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 + fcvt.d.s $ft0, $ft0 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvpermi.q $xr6, $xr8, 2 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 + xvfadd.d $xr5, $xr7, $xr5 xvfadd.d $xr4, $xr6, $xr4 - xvfadd.d $xr3, $xr5, $xr3 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpermi.q $xr5, $xr6, 2 - xvst $xr5, $a7, 0 + xvfcvt.s.d $xr4, $xr5, $xr4 + xvperm.w $xr4, $xr4, $xr1 + xvst $xr4, $a7, 0 xvaddi.wu $xr0, $xr0, 8 addi.d $t0, $t0, -8 addi.d $a7, $a7, 32 @@ -7789,73 +7392,54 @@ Q__align_gapmap: # @Q__align_gapmap ori $t0, $zero, 1 move $a6, $a5 bstrins.d $a6, $t0, 2, 0 + pcalau12i $t0, %pc_hi20(.LCPI6_2) + xvld $xr0, $t0, %pc_lo12(.LCPI6_2) pcalau12i $t0, %pc_hi20(.LCPI6_1) - xvld $xr0, $t0, %pc_lo12(.LCPI6_1) - xvreplgr2vr.w $xr1, $a2 + xvld $xr1, $t0, %pc_lo12(.LCPI6_1) + xvreplgr2vr.w $xr2, $a2 addi.d $t0, $a3, 4 - xvldi $xr2, -800 + xvldi $xr3, -800 move $t1, $a7 .p2align 4, , 16 .LBB6_139: # %vector.body1057 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr1, $xr0 - xvpermi.q $xr4, $xr3, 1 + xvmul.w $xr4, $xr2, $xr0 + xvpermi.q $xr5, $xr4, 1 + vext2xv.d.w $xr5, $xr5 + xvffint.d.l $xr5, $xr5 vext2xv.d.w $xr4, $xr4 + xvld $xr6, $t0, 0 xvffint.d.l $xr4, $xr4 - vext2xv.d.w $xr3, $xr3 - xvld $xr5, $t0, 0 - xvffint.d.l $xr3, $xr3 - xvfmul.d $xr3, $xr3, $xr2 - xvfmul.d $xr4, $xr4, $xr2 - xvpermi.q $xr6, $xr5, 1 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + xvfmul.d $xr4, $xr4, $xr3 + xvfmul.d $xr5, $xr5, $xr3 + xvpermi.q $xr7, $xr6, 1 + vreplvei.w $vr8, $vr7, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr7, 1 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 + vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr8, 16 + xvpermi.q $xr7, $xr9, 2 + vreplvei.w $vr8, $vr6, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 + fcvt.d.s $ft0, $ft0 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvpermi.q $xr6, $xr8, 2 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 + xvfadd.d $xr5, $xr7, $xr5 xvfadd.d $xr4, $xr6, $xr4 - xvfadd.d $xr3, $xr5, $xr3 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpermi.q $xr5, $xr6, 2 - xvst $xr5, $t0, 0 + xvfcvt.s.d $xr4, $xr5, $xr4 + xvperm.w $xr4, $xr4, $xr1 + xvst $xr4, $t0, 0 xvaddi.wu $xr0, $xr0, 8 addi.d $t1, $t1, -8 addi.d $t0, $t0, 32 @@ -8025,7 +7609,7 @@ Q__align_gapmap: # @Q__align_gapmap andi $a7, $a5, 12 move $a6, $a5 bstrins.d $a6, $zero, 3, 0 - xvreplve0.w $xr0, $xr19 + xvreplve0.w $xr0, $xr18 addi.d $t1, $a3, 36 xvrepli.b $xr1, 0 xvreplgr2vr.w $xr2, $a0 @@ -8062,7 +7646,7 @@ Q__align_gapmap: # @Q__align_gapmap ori $t1, $zero, 1 move $a7, $a5 bstrins.d $a7, $t1, 1, 0 - vreplvei.w $vr0, $vr19, 0 + vreplvei.w $vr0, $vr18, 0 addi.d $t1, $a3, 4 slli.d $t2, $a6, 2 sub.d $a6, $a6, $t0 @@ -8091,7 +7675,7 @@ Q__align_gapmap: # @Q__align_gapmap # =>This Inner Loop Header: Depth=1 fldx.s $fa1, $a6, $a5 stx.w $zero, $a3, $a5 - fmadd.s $fa1, $ft11, $fa0, $fa1 + fmadd.s $fa1, $ft10, $fa0, $fa1 fstx.s $fa1, $a2, $a5 addi.d $a4, $a4, -1 addi.d $a5, $a5, 4 @@ -8145,7 +7729,7 @@ Q__align_gapmap: # @Q__align_gapmap st.d $a3, $sp, 232 # 8-byte Folded Spill ld.d $a3, $sp, 88 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(Q__align_gapmap.mp) - ld.d $a4, $sp, 104 # 8-byte Folded Reload + ld.d $a4, $sp, 96 # 8-byte Folded Reload ld.d $a4, $a4, %pc_lo12(Q__align_gapmap.fg_t_og_h_dg_n2_p) ld.d $a5, $sp, 168 # 8-byte Folded Reload ld.d $a5, $a5, %pc_lo12(Q__align_gapmap.gapz_n2) @@ -8190,12 +7774,12 @@ Q__align_gapmap: # @Q__align_gapmap st.d $t3, $sp, 272 # 8-byte Folded Spill or $a1, $a1, $t3 st.d $a1, $sp, 168 # 8-byte Folded Spill - addi.d $s2, $a2, 4 + addi.d $s4, $a2, 4 addi.d $s0, $a3, 4 addi.d $fp, $a4, 4 addi.d $s1, $a6, 4 addi.d $s7, $a7, 4 - addi.d $s4, $t0, 4 + addi.d $s2, $t0, 4 addi.d $s6, $a5, 4 addi.d $s3, $t1, 4 movgr2fr.w $fs0, $zero @@ -8364,7 +7948,7 @@ Q__align_gapmap: # @Q__align_gapmap fldx.s $ft3, $s3, $a2 fmul.s $ft2, $fa7, $ft2 fadd.s $ft2, $ft1, $ft2 - fldx.s $ft4, $s4, $a2 + fldx.s $ft4, $s2, $a2 fmul.s $ft3, $fa2, $ft3 fadd.s $ft2, $ft2, $ft3 fldx.s $ft3, $s6, $a2 @@ -8385,7 +7969,7 @@ Q__align_gapmap: # @Q__align_gapmap .LBB6_189: # in Loop: Header=BB6_187 Depth=2 add.d $a7, $s5, $a2 fld.s $ft4, $a7, 0 - fldx.s $ft3, $s2, $a2 + fldx.s $ft3, $s4, $a2 fldx.s $ft2, $s1, $a2 fmul.s $ft4, $fa0, $ft4 fadd.s $ft4, $ft3, $ft4 @@ -8403,7 +7987,7 @@ Q__align_gapmap: # @Q__align_gapmap fcmp.cult.s $fcc0, $ft4, $ft3 bcnez $fcc0, .LBB6_186 # %bb.192: # in Loop: Header=BB6_187 Depth=2 - fstx.s $ft4, $s2, $a2 + fstx.s $ft4, $s4, $a2 stx.w $t2, $s0, $a2 b .LBB6_186 .LBB6_193: @@ -8416,9 +8000,9 @@ Q__align_gapmap: # @Q__align_gapmap move $t5, $t1 ld.d $s7, $sp, 152 # 8-byte Folded Reload ld.d $s6, $sp, 448 # 8-byte Folded Reload - ld.d $s5, $sp, 120 # 8-byte Folded Reload + ld.d $s5, $sp, 112 # 8-byte Folded Reload ld.d $s4, $sp, 80 # 8-byte Folded Reload - ld.d $s2, $sp, 96 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload bnez $s0, .LBB6_209 .LBB6_195: # %.preheader555 ld.d $a0, $sp, 392 # 8-byte Folded Reload @@ -8441,20 +8025,30 @@ Q__align_gapmap: # @Q__align_gapmap move $a3, $a2 bstrins.d $a3, $a5, 2, 0 xvreplgr2vr.w $xr0, $a0 - pcalau12i $a5, %pc_hi20(.LCPI6_2) - xvld $xr1, $a5, %pc_lo12(.LCPI6_2) pcalau12i $a5, %pc_hi20(.LCPI6_3) - xvld $xr2, $a5, %pc_lo12(.LCPI6_3) + xvld $xr1, $a5, %pc_lo12(.LCPI6_3) + pcalau12i $a5, %pc_hi20(.LCPI6_4) + xvld $xr2, $a5, %pc_lo12(.LCPI6_4) + pcalau12i $a5, %pc_hi20(.LCPI6_1) + xvld $xr3, $a5, %pc_lo12(.LCPI6_1) ld.d $a5, $sp, 344 # 8-byte Folded Reload - xvreplgr2vr.d $xr3, $a5 + xvreplgr2vr.d $xr4, $a5 addi.d $a5, $t5, 4 - xvldi $xr4, -800 + xvldi $xr5, -800 move $a6, $a4 .p2align 4, , 16 .LBB6_198: # %vector.body1179 # =>This Inner Loop Header: Depth=1 - xvsub.d $xr5, $xr3, $xr1 - xvsub.d $xr6, $xr3, $xr2 + xvsub.d $xr6, $xr4, $xr1 + xvsub.d $xr7, $xr4, $xr2 + xvpickve2gr.d $a7, $xr7, 0 + vinsgr2vr.w $vr8, $a7, 0 + xvpickve2gr.d $a7, $xr7, 1 + vinsgr2vr.w $vr8, $a7, 1 + xvpickve2gr.d $a7, $xr7, 2 + vinsgr2vr.w $vr8, $a7, 2 + xvpickve2gr.d $a7, $xr7, 3 + vinsgr2vr.w $vr8, $a7, 3 xvpickve2gr.d $a7, $xr6, 0 vinsgr2vr.w $vr7, $a7, 0 xvpickve2gr.d $a7, $xr6, 1 @@ -8463,73 +8057,44 @@ Q__align_gapmap: # @Q__align_gapmap vinsgr2vr.w $vr7, $a7, 2 xvpickve2gr.d $a7, $xr6, 3 vinsgr2vr.w $vr7, $a7, 3 - xvpickve2gr.d $a7, $xr5, 0 - vinsgr2vr.w $vr6, $a7, 0 - xvpickve2gr.d $a7, $xr5, 1 - vinsgr2vr.w $vr6, $a7, 1 - xvpickve2gr.d $a7, $xr5, 2 - vinsgr2vr.w $vr6, $a7, 2 - xvpickve2gr.d $a7, $xr5, 3 - vinsgr2vr.w $vr6, $a7, 3 - xvpermi.q $xr7, $xr6, 2 - xvmul.w $xr5, $xr0, $xr7 - xvpermi.q $xr6, $xr5, 1 + xvpermi.q $xr8, $xr7, 2 + xvmul.w $xr6, $xr0, $xr8 + xvpermi.q $xr7, $xr6, 1 + vext2xv.d.w $xr7, $xr7 + xvffint.d.l $xr7, $xr7 vext2xv.d.w $xr6, $xr6 + xvld $xr8, $a5, 0 xvffint.d.l $xr6, $xr6 - vext2xv.d.w $xr5, $xr5 - xvld $xr7, $a5, 0 - xvffint.d.l $xr5, $xr5 - xvfmul.d $xr5, $xr5, $xr4 - xvfmul.d $xr6, $xr6, $xr4 - xvpermi.q $xr8, $xr7, 1 - vreplvei.w $vr9, $vr8, 3 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr10, $vr8, 2 + xvfmul.d $xr6, $xr6, $xr5 + xvfmul.d $xr7, $xr7, $xr5 + xvpermi.q $xr9, $xr8, 1 + vreplvei.w $vr10, $vr9, 3 fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr9, 16 - vreplvei.w $vr9, $vr8, 1 + vreplvei.w $vr11, $vr9, 2 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr10, 16 + vreplvei.w $vr10, $vr9, 1 + fcvt.d.s $ft2, $ft2 + vreplvei.w $vr9, $vr9, 0 fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr10, 16 + xvpermi.q $xr9, $xr11, 2 + vreplvei.w $vr10, $vr8, 3 + fcvt.d.s $ft2, $ft2 + vreplvei.w $vr11, $vr8, 2 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr10, 16 + vreplvei.w $vr10, $vr8, 1 + fcvt.d.s $ft2, $ft2 vreplvei.w $vr8, $vr8, 0 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr9, 16 - xvpermi.q $xr8, $xr10, 2 - vreplvei.w $vr9, $vr7, 3 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr10, $vr7, 2 - fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr9, 16 - vreplvei.w $vr9, $vr7, 1 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr7, $vr7, 0 - fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr9, 16 - xvpermi.q $xr7, $xr10, 2 + vextrins.d $vr8, $vr10, 16 + xvpermi.q $xr8, $xr11, 2 + xvfadd.d $xr7, $xr9, $xr7 xvfadd.d $xr6, $xr8, $xr6 - xvfadd.d $xr5, $xr7, $xr5 - xvpickve.d $xr7, $xr6, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr6, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr6, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr6, $xr6, 3 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr8, $vr6, 48 - xvpickve.d $xr6, $xr5, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr5, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr5, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 48 - xvpermi.q $xr7, $xr8, 2 - xvst $xr7, $a5, 0 + xvfcvt.s.d $xr6, $xr7, $xr6 + xvperm.w $xr6, $xr6, $xr3 + xvst $xr6, $a5, 0 xvaddi.du $xr2, $xr2, 8 xvaddi.du $xr1, $xr1, 8 addi.d $a6, $a6, -8 @@ -8586,8 +8151,8 @@ Q__align_gapmap: # @Q__align_gapmap move $a0, $a3 bstrins.d $a0, $a5, 1, 0 xvreplve0.d $xr2, $xr0 - pcalau12i $a5, %pc_hi20(.LCPI6_4) - vld $vr3, $a5, %pc_lo12(.LCPI6_4) + pcalau12i $a5, %pc_hi20(.LCPI6_5) + vld $vr3, $a5, %pc_lo12(.LCPI6_5) xvreplve0.d $xr4, $xr1 addi.d $a5, $a1, 4 xvldi $xr5, -800 @@ -8612,18 +8177,9 @@ Q__align_gapmap: # @Q__align_gapmap vextrins.d $vr7, $vr8, 16 xvpermi.q $xr7, $xr9, 2 xvfmadd.d $xr6, $xr4, $xr6, $xr7 - xvpickve.d $xr7, $xr6, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr6, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr6, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr6, $xr6, 3 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr8, $vr6, 48 - vst $vr8, $a5, 0 + xvpermi.q $xr7, $xr6, 1 + vfcvt.s.d $vr6, $vr7, $vr6 + vst $vr6, $a5, 0 vaddi.wu $vr3, $vr3, 4 addi.d $a6, $a6, -4 addi.d $a5, $a5, 16 @@ -8655,7 +8211,7 @@ Q__align_gapmap: # @Q__align_gapmap ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $s8, $a0, %pc_lo12(Q__align_gapmap.lastverticalw) ld.d $s4, $s5, %pc_lo12(Q__align_gapmap.mseq1) - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 120 # 8-byte Folded Reload move $fp, $s5 ld.d $s5, $a0, %pc_lo12(Q__align_gapmap.mseq2) ld.d $a0, $sp, 72 # 8-byte Folded Reload @@ -8759,8 +8315,8 @@ Q__align_gapmap: # @Q__align_gapmap move $a4, $zero b .LBB6_228 .LBB6_225: # %vector.ph1210 - pcalau12i $a5, %pc_hi20(.LCPI6_5) - vld $vr0, $a5, %pc_lo12(.LCPI6_5) + pcalau12i $a5, %pc_hi20(.LCPI6_6) + vld $vr0, $a5, %pc_lo12(.LCPI6_6) bstrpick.d $a4, $a4, 31, 2 slli.d $a4, $a4, 2 addi.d $a5, $s3, 16 @@ -8821,8 +8377,8 @@ Q__align_gapmap: # @Q__align_gapmap andi $a6, $a5, 12 bstrpick.d $a4, $a5, 31, 4 slli.d $a4, $a4, 4 - pcalau12i $a7, %pc_hi20(.LCPI6_6) - xvld $xr0, $a7, %pc_lo12(.LCPI6_6) + pcalau12i $a7, %pc_hi20(.LCPI6_7) + xvld $xr0, $a7, %pc_lo12(.LCPI6_7) addi.d $a7, $a2, 32 xvrepli.b $xr1, -1 xvrepli.w $xr2, -9 @@ -8844,8 +8400,8 @@ Q__align_gapmap: # @Q__align_gapmap beqz $a6, .LBB6_242 .LBB6_239: # %vec.epilog.ph1244 move $a6, $a4 - pcalau12i $a4, %pc_hi20(.LCPI6_7) - vld $vr0, $a4, %pc_lo12(.LCPI6_7) + pcalau12i $a4, %pc_hi20(.LCPI6_8) + vld $vr0, $a4, %pc_lo12(.LCPI6_8) bstrpick.d $a4, $a5, 31, 2 slli.d $a4, $a4, 2 vreplgr2vr.w $vr1, $a6 @@ -9145,7 +8701,7 @@ Q__align_gapmap: # @Q__align_gapmap .LBB6_283: ld.d $s7, $sp, 152 # 8-byte Folded Reload .LBB6_284: # %Atracking_localhom_gapmap.exit - ld.d $fp, $sp, 120 # 8-byte Folded Reload + ld.d $fp, $sp, 112 # 8-byte Folded Reload .LBB6_285: # %Atracking_localhom_gapmap.exit ld.d $a0, $s4, 0 pcaddu18i $ra, %call36(strlen) @@ -9175,7 +8731,7 @@ Q__align_gapmap: # @Q__align_gapmap ld.d $a2, $sp, 408 # 8-byte Folded Reload blez $a2, .LBB6_293 # %bb.291: # %.lr.ph640 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 120 # 8-byte Folded Reload ld.d $fp, $a0, %pc_lo12(Q__align_gapmap.mseq2) .p2align 4, , 16 .LBB6_292: # =>This Inner Loop Header: Depth=1 @@ -9227,10 +8783,10 @@ Q__align_gapmap: # @Q__align_gapmap addi.d $a7, $a6, 1 b .LBB6_171 .LBB6_297: - ld.d $s2, $sp, 96 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload b .LBB6_27 .LBB6_298: - ld.d $s2, $sp, 96 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload b .LBB6_32 .Lfunc_end6: .size Q__align_gapmap, .Lfunc_end6-Q__align_gapmap diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Ralignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Ralignmm.s index 9b57da9..d9af83f 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Ralignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Ralignmm.s @@ -445,16 +445,25 @@ imp_match_init_strictR: # @imp_match_init_strictR .word 7 # 0x7 .word 8 # 0x8 .LCPI2_2: + .word 0 # 0x0 + .word 1 # 0x1 + .word 4 # 0x4 + .word 5 # 0x5 + .word 2 # 0x2 + .word 3 # 0x3 + .word 6 # 0x6 + .word 7 # 0x7 +.LCPI2_3: .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 .dword 8 # 0x8 -.LCPI2_3: +.LCPI2_4: .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 .dword 4 # 0x4 -.LCPI2_6: +.LCPI2_7: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -465,15 +474,15 @@ imp_match_init_strictR: # @imp_match_init_strictR .word 7 # 0x7 .section .rodata.cst16,"aM",@progbits,16 .p2align 4, 0x0 -.LCPI2_4: +.LCPI2_5: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 .word 4 # 0x4 -.LCPI2_5: +.LCPI2_6: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI2_7: +.LCPI2_8: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -500,25 +509,23 @@ R__align: # @R__align fst.d $fs1, $sp, 488 # 8-byte Folded Spill fst.d $fs2, $sp, 480 # 8-byte Folded Spill st.d $a7, $sp, 56 # 8-byte Folded Spill - st.d $a6, $sp, 16 # 8-byte Folded Spill - move $s8, $a5 - st.d $a4, $sp, 120 # 8-byte Folded Spill - st.d $a3, $sp, 312 # 8-byte Folded Spill - move $s1, $a2 - move $s7, $a1 + st.d $a6, $sp, 24 # 8-byte Folded Spill + move $s6, $a5 + st.d $a4, $sp, 112 # 8-byte Folded Spill + st.d $a3, $sp, 472 # 8-byte Folded Spill + st.d $a2, $sp, 304 # 8-byte Folded Spill + move $s1, $a1 move $s3, $a0 pcalau12i $a0, %got_pc_hi20(penalty) ld.d $a0, $a0, %got_pc_lo12(penalty) - pcalau12i $a1, %pc_hi20(R__align.orlgth1) - st.d $a1, $sp, 432 # 8-byte Folded Spill - ld.w $fp, $a1, %pc_lo12(R__align.orlgth1) - ld.w $a0, $a0, 0 - st.d $a0, $sp, 304 # 8-byte Folded Spill + pcalau12i $fp, %pc_hi20(R__align.orlgth1) + ld.w $s7, $fp, %pc_lo12(R__align.orlgth1) + ld.w $s8, $a0, 0 pcalau12i $a0, %pc_hi20(R__align.mseq1) st.d $a0, $sp, 104 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.mseq2) st.d $a0, $sp, 80 # 8-byte Folded Spill - bnez $fp, .LBB2_2 + bnez $s7, .LBB2_2 # %bb.1: pcalau12i $a0, %got_pc_hi20(njob) ld.d $s0, $a0, %got_pc_lo12(njob) @@ -533,35 +540,34 @@ R__align: # @R__align move $a1, $zero pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 432 # 8-byte Folded Reload - ld.w $fp, $a1, %pc_lo12(R__align.orlgth1) + ld.w $s7, $fp, %pc_lo12(R__align.orlgth1) ld.d $a1, $sp, 80 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.mseq2) .LBB2_2: ld.d $a0, $s3, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 - ld.d $a1, $s7, 0 + ld.d $a1, $s1, 0 st.d $a0, $sp, 72 # 8-byte Folded Spill addi.w $s5, $a0, 0 move $a0, $a1 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 pcalau12i $a1, %pc_hi20(R__align.orlgth2) - st.d $a1, $sp, 248 # 8-byte Folded Spill + st.d $a1, $sp, 432 # 8-byte Folded Spill ld.w $s0, $a1, %pc_lo12(R__align.orlgth2) st.d $a0, $sp, 216 # 8-byte Folded Spill - addi.w $a5, $a0, 0 + addi.w $a1, $a0, 0 pcalau12i $a0, %pc_hi20(R__align.w1) - st.d $a0, $sp, 472 # 8-byte Folded Spill + st.d $a0, $sp, 400 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.w2) - st.d $a0, $sp, 408 # 8-byte Folded Spill + st.d $a0, $sp, 392 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.initverticalw) - st.d $a0, $sp, 400 # 8-byte Folded Spill + st.d $a0, $sp, 384 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.lastverticalw) st.d $a0, $sp, 96 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.m) - st.d $a0, $sp, 416 # 8-byte Folded Spill + st.d $a0, $sp, 408 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.mp) st.d $a0, $sp, 344 # 8-byte Folded Spill pcalau12i $s2, %pc_hi20(R__align.mseq) @@ -578,9 +584,9 @@ R__align: # @R__align pcalau12i $a0, %pc_hi20(R__align.gapz2) st.d $a0, $sp, 256 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.gapf1) - st.d $a0, $sp, 360 # 8-byte Folded Spill + st.d $a0, $sp, 312 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.gapf2) - st.d $a0, $sp, 368 # 8-byte Folded Spill + st.d $a0, $sp, 360 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.ogcp1g) st.d $a0, $sp, 352 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.ogcp2g) @@ -589,47 +595,48 @@ R__align: # @R__align st.d $a0, $sp, 328 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.fgcp2g) st.d $a0, $sp, 320 # 8-byte Folded Spill - pcalau12i $s6, %pc_hi20(R__align.cpmx1) - pcalau12i $a0, %pc_hi20(R__align.cpmx2) + pcalau12i $a0, %pc_hi20(R__align.cpmx1) st.d $a0, $sp, 424 # 8-byte Folded Spill + pcalau12i $a0, %pc_hi20(R__align.cpmx2) + st.d $a0, $sp, 416 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.floatwork) - st.d $a0, $sp, 384 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(R__align.intwork) st.d $a0, $sp, 376 # 8-byte Folded Spill - st.d $s8, $sp, 64 # 8-byte Folded Spill - st.d $s7, $sp, 128 # 8-byte Folded Spill - st.d $s3, $sp, 112 # 8-byte Folded Spill + pcalau12i $a0, %pc_hi20(R__align.intwork) + st.d $a0, $sp, 368 # 8-byte Folded Spill + st.d $s6, $sp, 64 # 8-byte Folded Spill + st.d $s1, $sp, 120 # 8-byte Folded Spill + st.d $s3, $sp, 128 # 8-byte Folded Spill st.d $s5, $sp, 88 # 8-byte Folded Spill - st.d $a5, $sp, 464 # 8-byte Folded Spill - st.d $s6, $sp, 392 # 8-byte Folded Spill - blt $fp, $s5, .LBB2_4 + st.d $a1, $sp, 464 # 8-byte Folded Spill + blt $s7, $s5, .LBB2_4 # %bb.3: - bge $s0, $a5, .LBB2_9 + bge $s0, $a1, .LBB2_9 .LBB2_4: pcalau12i $s6, %pc_hi20(R__align.match) pcalau12i $a0, %pc_hi20(R__align.ogcp1) st.d $a0, $sp, 240 # 8-byte Folded Spill - pcalau12i $s7, %pc_hi20(R__align.ogcp2) - pcalau12i $s4, %pc_hi20(R__align.fgcp1) - pcalau12i $s3, %pc_hi20(R__align.fgcp2) - st.d $s1, $sp, 232 # 8-byte Folded Spill - blez $fp, .LBB2_7 + pcalau12i $a0, %pc_hi20(R__align.ogcp2) + st.d $a0, $sp, 248 # 8-byte Folded Spill + pcalau12i $s3, %pc_hi20(R__align.fgcp1) + pcalau12i $s1, %pc_hi20(R__align.fgcp2) + st.d $s8, $sp, 232 # 8-byte Folded Spill + blez $s7, .LBB2_7 # %bb.5: ld.d $s5, $sp, 216 # 8-byte Folded Reload blez $s0, .LBB2_8 # %bb.6: - ld.d $a0, $sp, 472 # 8-byte Folded Reload + ld.d $a0, $sp, 400 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.w1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 408 # 8-byte Folded Reload + ld.d $a0, $sp, 392 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.w2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 ld.d $a0, $s6, %pc_lo12(R__align.match) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 400 # 8-byte Folded Reload + ld.d $a0, $sp, 384 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.initverticalw) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 @@ -637,7 +644,7 @@ R__align: # @R__align ld.d $a0, $a0, %pc_lo12(R__align.lastverticalw) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 416 # 8-byte Folded Reload + ld.d $a0, $sp, 408 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.m) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 @@ -672,11 +679,11 @@ R__align: # @R__align ld.d $a0, $a0, %pc_lo12(R__align.gapz2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.d $a0, $sp, 312 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapf1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 368 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapf2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 @@ -684,13 +691,14 @@ R__align: # @R__align ld.d $a0, $a0, %pc_lo12(R__align.ogcp1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $s7, %pc_lo12(R__align.ogcp2) + ld.d $a0, $sp, 248 # 8-byte Folded Reload + ld.d $a0, $a0, %pc_lo12(R__align.ogcp2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $s4, %pc_lo12(R__align.fgcp1) + ld.d $a0, $s3, %pc_lo12(R__align.fgcp1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $s3, %pc_lo12(R__align.fgcp2) + ld.d $a0, $s1, %pc_lo12(R__align.fgcp2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 ld.d $a0, $sp, 352 # 8-byte Folded Reload @@ -709,25 +717,24 @@ R__align: # @R__align ld.d $a0, $a0, %pc_lo12(R__align.fgcp2g) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 392 # 8-byte Folded Reload + ld.d $a0, $sp, 424 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.cpmx1) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 424 # 8-byte Folded Reload + ld.d $a0, $sp, 416 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.cpmx2) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.floatwork) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 376 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.intwork) pcaddu18i $ra, %call36(FreeIntMtx) jirl $ra, $ra, 0 + ld.w $s7, $fp, %pc_lo12(R__align.orlgth1) ld.d $a0, $sp, 432 # 8-byte Folded Reload - ld.w $fp, $a0, %pc_lo12(R__align.orlgth1) - ld.d $a0, $sp, 248 # 8-byte Folded Reload ld.w $s0, $a0, %pc_lo12(R__align.orlgth2) b .LBB2_8 .LBB2_7: @@ -741,12 +748,12 @@ R__align: # @R__align fmul.d $fa1, $fa1, $fa0 ftintrz.w.d $fa1, $fa1 movfr2gr.s $a0, $fa1 - slt $a1, $a0, $fp + slt $a1, $a0, $s7 masknez $a0, $a0, $a1 - maskeqz $a1, $fp, $a1 - or $fp, $a1, $a0 - st.d $s3, $sp, 224 # 8-byte Folded Spill - addi.w $s3, $fp, 100 + maskeqz $a1, $s7, $a1 + or $s7, $a1, $a0 + move $s4, $s3 + addi.w $s3, $s7, 100 movgr2fr.w $fa1, $s5 ffint.d.w $fa1, $fa1 fmul.d $fa0, $fa1, $fa0 @@ -761,22 +768,22 @@ R__align: # @R__align move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 472 # 8-byte Folded Reload + ld.d $a1, $sp, 400 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.w1) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 408 # 8-byte Folded Reload + ld.d $a1, $sp, 392 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.w2) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 st.d $a0, $s6, %pc_lo12(R__align.match) - addi.w $s6, $fp, 102 + addi.w $s6, $s7, 102 move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 400 # 8-byte Folded Reload + ld.d $a1, $sp, 384 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.initverticalw) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -786,7 +793,7 @@ R__align: # @R__align move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 416 # 8-byte Folded Reload + ld.d $a1, $sp, 408 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.m) move $a0, $s5 pcaddu18i $ra, %call36(AllocateIntVec) @@ -833,12 +840,12 @@ R__align: # @R__align move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 360 # 8-byte Folded Reload + ld.d $a1, $sp, 312 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.gapf1) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 368 # 8-byte Folded Reload + ld.d $a1, $sp, 360 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.gapf2) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -848,8 +855,8 @@ R__align: # @R__align move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - st.d $a0, $s7, %pc_lo12(R__align.ogcp2) - ld.d $s1, $sp, 432 # 8-byte Folded Reload + ld.d $a1, $sp, 248 # 8-byte Folded Reload + st.d $a0, $a1, %pc_lo12(R__align.ogcp2) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 @@ -857,8 +864,7 @@ R__align: # @R__align move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 224 # 8-byte Folded Reload - st.d $a0, $a1, %pc_lo12(R__align.fgcp2) + st.d $a0, $s1, %pc_lo12(R__align.fgcp2) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 @@ -883,13 +889,13 @@ R__align: # @R__align move $a1, $s6 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $s6, $sp, 392 # 8-byte Folded Reload - st.d $a0, $s6, %pc_lo12(R__align.cpmx1) + ld.d $a1, $sp, 424 # 8-byte Folded Reload + st.d $a0, $a1, %pc_lo12(R__align.cpmx1) ori $a0, $zero, 26 move $a1, $s5 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 424 # 8-byte Folded Reload + ld.d $a1, $sp, 416 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.cpmx2) slt $a0, $s8, $s3 masknez $a1, $s8, $a0 @@ -900,31 +906,30 @@ R__align: # @R__align move $a0, $s3 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 384 # 8-byte Folded Reload + ld.d $a1, $sp, 376 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.floatwork) ori $a1, $zero, 27 move $a0, $s3 pcaddu18i $ra, %call36(AllocateIntMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 376 # 8-byte Folded Reload + ld.d $a1, $sp, 368 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.intwork) - st.w $fp, $s1, %pc_lo12(R__align.orlgth1) - ld.d $a0, $sp, 248 # 8-byte Folded Reload + st.w $s7, $fp, %pc_lo12(R__align.orlgth1) + ld.d $a0, $sp, 432 # 8-byte Folded Reload st.w $s0, $a0, %pc_lo12(R__align.orlgth2) - ld.d $s8, $sp, 64 # 8-byte Folded Reload - ld.d $s7, $sp, 128 # 8-byte Folded Reload - ld.d $s3, $sp, 112 # 8-byte Folded Reload + ld.d $s6, $sp, 64 # 8-byte Folded Reload + ld.d $s1, $sp, 120 # 8-byte Folded Reload + ld.d $s3, $sp, 128 # 8-byte Folded Reload ld.d $s5, $sp, 88 # 8-byte Folded Reload - ld.d $a5, $sp, 464 # 8-byte Folded Reload - ld.d $s1, $sp, 232 # 8-byte Folded Reload + ld.d $s8, $sp, 232 # 8-byte Folded Reload .LBB2_9: - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 112 # 8-byte Folded Reload blez $a0, .LBB2_12 # %bb.10: # %.lr.ph ld.d $a0, $s2, %pc_lo12(R__align.mseq) ld.d $a1, $sp, 104 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(R__align.mseq1) - ld.d $a2, $sp, 120 # 8-byte Folded Reload + ld.d $a2, $sp, 112 # 8-byte Folded Reload .p2align 4, , 16 .LBB2_11: # =>This Inner Loop Header: Depth=1 ld.d $a4, $a0, 0 @@ -937,15 +942,16 @@ R__align: # @R__align addi.d $a0, $a0, 8 bnez $a2, .LBB2_11 .LBB2_12: # %.preheader580 - blez $s8, .LBB2_15 + ld.d $a5, $sp, 464 # 8-byte Folded Reload + blez $s6, .LBB2_15 # %bb.13: # %.lr.ph592 ld.d $a1, $s2, %pc_lo12(R__align.mseq) ld.d $a0, $sp, 80 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.mseq2) - ld.d $a2, $sp, 120 # 8-byte Folded Reload + ld.d $a2, $sp, 112 # 8-byte Folded Reload alsl.d $a1, $a2, $a1, 3 - move $a2, $s8 - move $a3, $s7 + move $a2, $s6 + move $a3, $s1 .p2align 4, , 16 .LBB2_14: # =>This Inner Loop Header: Depth=1 ld.d $a4, $a1, 0 @@ -959,15 +965,14 @@ R__align: # @R__align bnez $a2, .LBB2_14 .LBB2_15: # %._crit_edge ld.d $s4, $sp, 600 - ld.d $a0, $sp, 304 # 8-byte Folded Reload - movgr2fr.w $fs0, $a0 + movgr2fr.w $fs0, $s8 pcalau12i $a0, %got_pc_hi20(commonAlloc1) ld.d $s2, $a0, %got_pc_lo12(commonAlloc1) ld.w $a0, $s2, 0 pcalau12i $a1, %got_pc_hi20(commonAlloc2) ld.d $s3, $a1, %got_pc_lo12(commonAlloc2) ld.w $a1, $s3, 0 - blt $a0, $fp, .LBB2_18 + blt $a0, $s7, .LBB2_18 # %bb.16: # %._crit_edge blt $a1, $s0, .LBB2_18 # %bb.17: # %._crit_edge725 @@ -985,16 +990,15 @@ R__align: # @R__align ld.d $a0, $a0, 0 pcaddu18i $ra, %call36(FreeIntMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 432 # 8-byte Folded Reload - ld.w $fp, $a0, %pc_lo12(R__align.orlgth1) + ld.w $s7, $fp, %pc_lo12(R__align.orlgth1) ld.w $a0, $s2, 0 - ld.d $a1, $sp, 248 # 8-byte Folded Reload + ld.d $a1, $sp, 432 # 8-byte Folded Reload ld.w $s0, $a1, %pc_lo12(R__align.orlgth2) ld.w $a1, $s3, 0 .LBB2_21: - slt $a2, $a0, $fp + slt $a2, $a0, $s7 masknez $a0, $a0, $a2 - maskeqz $a2, $fp, $a2 + maskeqz $a2, $s7, $a2 or $fp, $a2, $a0 slt $a0, $a1, $s0 masknez $a1, $a1, $a0 @@ -1010,30 +1014,30 @@ R__align: # @R__align st.w $fp, $s2, 0 st.w $s0, $s3, 0 .LBB2_22: - ld.d $a1, $s6, %pc_lo12(R__align.cpmx1) + ld.d $a1, $sp, 424 # 8-byte Folded Reload + ld.d $a1, $a1, %pc_lo12(R__align.cpmx1) ffint.s.w $fa0, $fs0 xvst $xr0, $sp, 432 # 32-byte Folded Spill pcalau12i $a2, %pc_hi20(R__align.ijp) st.d $a2, $sp, 48 # 8-byte Folded Spill st.d $a0, $a2, %pc_lo12(R__align.ijp) - ld.d $s0, $sp, 112 # 8-byte Folded Reload + ld.d $s0, $sp, 128 # 8-byte Folded Reload move $a0, $s0 - move $a2, $s1 - ld.d $s6, $sp, 88 # 8-byte Folded Reload - move $a3, $s6 - ld.d $fp, $sp, 120 # 8-byte Folded Reload + ld.d $s5, $sp, 304 # 8-byte Folded Reload + move $a2, $s5 + ld.d $s8, $sp, 88 # 8-byte Folded Reload + move $a3, $s8 + ld.d $fp, $sp, 112 # 8-byte Folded Reload move $a4, $fp pcaddu18i $ra, %call36(cpmx_calc_new) jirl $ra, $ra, 0 - ld.d $a0, $sp, 424 # 8-byte Folded Reload + ld.d $a0, $sp, 416 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(R__align.cpmx2) - move $a0, $s7 - move $s5, $s1 - ld.d $s1, $sp, 312 # 8-byte Folded Reload - move $a2, $s1 + move $a0, $s1 + ld.d $a2, $sp, 472 # 8-byte Folded Reload ld.d $s2, $sp, 464 # 8-byte Folded Reload move $a3, $s2 - move $a4, $s8 + move $a4, $s6 pcaddu18i $ra, %call36(cpmx_calc_new) jirl $ra, $ra, 0 ld.d $a0, $sp, 352 # 8-byte Folded Reload @@ -1043,14 +1047,15 @@ R__align: # @R__align move $a1, $fp move $a2, $s0 move $a3, $s5 - move $a4, $s6 + move $a4, $s8 pcaddu18i $ra, %call36(st_OpeningGapCount) jirl $ra, $ra, 0 ld.d $a0, $sp, 336 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.ogcp2g) - move $a1, $s8 - move $a2, $s7 - move $a3, $s1 + move $a1, $s6 + move $a2, $s1 + ld.d $s4, $sp, 472 # 8-byte Folded Reload + move $a3, $s4 move $a4, $s2 pcaddu18i $ra, %call36(st_OpeningGapCount) jirl $ra, $ra, 0 @@ -1059,14 +1064,14 @@ R__align: # @R__align move $a1, $fp move $a2, $s0 move $a3, $s5 - move $a4, $s6 + move $a4, $s8 pcaddu18i $ra, %call36(st_FinalGapCount_zure) jirl $ra, $ra, 0 ld.d $a0, $sp, 320 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.fgcp2g) - move $a1, $s8 - move $a2, $s7 - move $a3, $s1 + move $a1, $s6 + move $a2, $s1 + move $a3, $s4 move $a4, $s2 pcaddu18i $ra, %call36(st_FinalGapCount_zure) jirl $ra, $ra, 0 @@ -1075,14 +1080,14 @@ R__align: # @R__align move $a1, $fp move $a2, $s0 move $a3, $s5 - move $a4, $s6 + move $a4, $s8 pcaddu18i $ra, %call36(getdigapfreq_st) jirl $ra, $ra, 0 ld.d $a0, $sp, 280 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.digf2) - move $a1, $s8 - move $a2, $s7 - move $a3, $s1 + move $a1, $s6 + move $a2, $s1 + move $a3, $s4 move $a4, $s2 pcaddu18i $ra, %call36(getdigapfreq_st) jirl $ra, $ra, 0 @@ -1091,30 +1096,30 @@ R__align: # @R__align move $a1, $fp move $a2, $s0 move $a3, $s5 - move $a4, $s6 + move $a4, $s8 pcaddu18i $ra, %call36(getdiaminofreq_x) jirl $ra, $ra, 0 ld.d $a0, $sp, 264 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.diaf2) - move $a1, $s8 - move $a2, $s7 - move $a3, $s1 + move $a1, $s6 + move $a2, $s1 + move $a3, $s4 move $a4, $s2 pcaddu18i $ra, %call36(getdiaminofreq_x) jirl $ra, $ra, 0 - ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.d $a0, $sp, 312 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapf1) move $a1, $fp move $a2, $s0 move $a3, $s5 - move $a4, $s6 + move $a4, $s8 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 - ld.d $a0, $sp, 368 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapf2) - move $a1, $s8 - move $a2, $s7 - move $a3, $s1 + move $a1, $s6 + move $a2, $s1 + move $a3, $s4 move $a4, $s2 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 @@ -1123,207 +1128,200 @@ R__align: # @R__align move $a1, $fp move $a2, $s0 move $a3, $s5 - move $a4, $s6 + move $a4, $s8 pcaddu18i $ra, %call36(getgapfreq_zure) jirl $ra, $ra, 0 ld.d $a0, $sp, 256 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapz2) - move $a1, $s8 - move $a2, $s7 - move $a3, $s1 + move $a1, $s6 + move $a2, $s1 + move $a3, $s4 + move $s3, $s2 move $a4, $s2 pcaddu18i $ra, %call36(getgapfreq_zure) jirl $ra, $ra, 0 b .LBB2_25 .LBB2_24: - ld.d $a1, $sp, 624 - st.d $a1, $sp, 304 # 8-byte Folded Spill + ld.d $s2, $sp, 624 ld.d $s3, $sp, 616 ld.d $s7, $sp, 608 move $a1, $fp - move $a2, $s0 + ld.d $a2, $sp, 128 # 8-byte Folded Reload move $a3, $s5 - move $a4, $s6 - move $s0, $s4 + move $a4, $s8 move $a5, $s4 - st.d $s4, $sp, 224 # 8-byte Folded Spill move $a6, $s3 pcaddu18i $ra, %call36(new_OpeningGapCount_zure) jirl $ra, $ra, 0 ld.d $a0, $sp, 336 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.ogcp2g) - move $a1, $s8 - ld.d $a2, $sp, 128 # 8-byte Folded Reload - move $s4, $s1 - move $a3, $s1 - move $a4, $s2 - move $s1, $s7 - st.d $s7, $sp, 240 # 8-byte Folded Spill + move $a1, $s6 + ld.d $a2, $sp, 120 # 8-byte Folded Reload + ld.d $s0, $sp, 472 # 8-byte Folded Reload + move $a3, $s0 + ld.d $a4, $sp, 464 # 8-byte Folded Reload move $a5, $s7 - ld.d $s7, $sp, 304 # 8-byte Folded Reload - move $a6, $s7 + move $a6, $s2 pcaddu18i $ra, %call36(new_OpeningGapCount_zure) jirl $ra, $ra, 0 ld.d $a0, $sp, 328 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.fgcp1g) move $a1, $fp - ld.d $fp, $sp, 112 # 8-byte Folded Reload - move $a2, $fp + ld.d $a2, $sp, 128 # 8-byte Folded Reload move $a3, $s5 - move $a4, $s6 - move $a5, $s0 + move $a4, $s8 + move $a5, $s4 move $a6, $s3 - st.d $s3, $sp, 248 # 8-byte Folded Spill pcaddu18i $ra, %call36(new_FinalGapCount_zure) jirl $ra, $ra, 0 ld.d $a0, $sp, 320 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.fgcp2g) - move $a1, $s8 - ld.d $a2, $sp, 128 # 8-byte Folded Reload - move $a3, $s4 - move $a4, $s2 - move $a5, $s1 - move $a6, $s7 + move $a1, $s6 + ld.d $a2, $sp, 120 # 8-byte Folded Reload + move $a3, $s0 + ld.d $a4, $sp, 464 # 8-byte Folded Reload + move $a5, $s7 + move $a6, $s2 pcaddu18i $ra, %call36(new_FinalGapCount_zure) jirl $ra, $ra, 0 ld.d $a0, $sp, 296 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.digf1) - ld.d $s0, $sp, 120 # 8-byte Folded Reload - move $a1, $s0 - move $a2, $fp + move $a1, $fp + ld.d $a2, $sp, 128 # 8-byte Folded Reload move $a3, $s5 - move $a4, $s6 - ld.d $s4, $sp, 224 # 8-byte Folded Reload + move $a4, $s8 move $a5, $s4 move $a6, $s3 pcaddu18i $ra, %call36(getdigapfreq_part) jirl $ra, $ra, 0 ld.d $a0, $sp, 280 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.digf2) - move $a1, $s8 - ld.d $a2, $sp, 128 # 8-byte Folded Reload - ld.d $s1, $sp, 312 # 8-byte Folded Reload - move $a3, $s1 - move $a4, $s2 - ld.d $s3, $sp, 240 # 8-byte Folded Reload - move $a5, $s3 - move $a6, $s7 + move $a1, $s6 + ld.d $a2, $sp, 120 # 8-byte Folded Reload + move $a3, $s0 + ld.d $a4, $sp, 464 # 8-byte Folded Reload + move $a5, $s7 + move $a6, $s2 pcaddu18i $ra, %call36(getdigapfreq_part) jirl $ra, $ra, 0 ld.d $a0, $sp, 288 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.diaf1) - move $a1, $s0 - move $a2, $fp + move $a1, $fp + ld.d $a2, $sp, 128 # 8-byte Folded Reload move $a3, $s5 - move $a4, $s6 + move $a4, $s8 move $a5, $s4 - ld.d $a6, $sp, 248 # 8-byte Folded Reload + move $a6, $s3 + move $s3, $s0 + ld.d $s1, $sp, 464 # 8-byte Folded Reload + ld.d $s0, $sp, 128 # 8-byte Folded Reload pcaddu18i $ra, %call36(getdiaminofreq_part) jirl $ra, $ra, 0 ld.d $a0, $sp, 264 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.diaf2) - move $a1, $s8 - ld.d $a2, $sp, 128 # 8-byte Folded Reload - move $a3, $s1 - move $a4, $s2 - move $a5, $s3 - ld.d $a6, $sp, 304 # 8-byte Folded Reload - ld.d $s3, $sp, 128 # 8-byte Folded Reload + move $a1, $s6 + ld.d $a2, $sp, 120 # 8-byte Folded Reload + move $a3, $s3 + move $a4, $s1 + move $a5, $s7 + move $a6, $s2 + ld.d $s2, $sp, 120 # 8-byte Folded Reload pcaddu18i $ra, %call36(getdiaminofreq_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.d $a0, $sp, 312 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapf1) - move $a1, $s0 - move $a2, $fp + move $a1, $fp + move $a2, $s0 move $a3, $s5 - move $a4, $s6 + move $a4, $s8 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 - ld.d $a0, $sp, 368 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapf2) - move $a1, $s8 - move $a2, $s3 - move $a3, $s1 - move $a4, $s2 + move $a1, $s6 + move $a2, $s2 + move $a3, $s3 + move $a4, $s1 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 ld.d $a0, $sp, 272 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapz1) - move $a1, $s0 - move $a2, $fp + move $a1, $fp + move $a2, $s0 move $a3, $s5 - move $a4, $s6 + move $a4, $s8 move $a5, $s4 pcaddu18i $ra, %call36(getgapfreq_zure_part) jirl $ra, $ra, 0 ld.d $a0, $sp, 256 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapz2) - move $a1, $s8 - move $a2, $s3 - move $a3, $s1 - move $a4, $s2 + move $a1, $s6 + move $a2, $s2 + move $a3, $s3 + move $s3, $s1 + move $a4, $s1 move $a5, $s4 pcaddu18i $ra, %call36(getgapfreq_zure_part) jirl $ra, $ra, 0 .LBB2_25: - ld.d $a0, $sp, 472 # 8-byte Folded Reload + ld.d $a0, $sp, 400 # 8-byte Folded Reload ld.d $s5, $a0, %pc_lo12(R__align.w1) - ld.d $a0, $sp, 408 # 8-byte Folded Reload + ld.d $a0, $sp, 392 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.w2) st.d $a0, $sp, 472 # 8-byte Folded Spill - ld.d $a0, $sp, 400 # 8-byte Folded Reload + ld.d $a0, $sp, 384 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.initverticalw) + ld.d $a1, $sp, 416 # 8-byte Folded Reload + ld.d $s4, $a1, %pc_lo12(R__align.cpmx2) ld.d $a1, $sp, 424 # 8-byte Folded Reload - ld.d $s8, $a1, %pc_lo12(R__align.cpmx2) - ld.d $a1, $sp, 392 # 8-byte Folded Reload - ld.d $s7, $a1, %pc_lo12(R__align.cpmx1) - ld.d $a1, $sp, 384 # 8-byte Folded Reload - ld.d $s1, $a1, %pc_lo12(R__align.floatwork) + ld.d $fp, $a1, %pc_lo12(R__align.cpmx1) ld.d $a1, $sp, 376 # 8-byte Folded Reload - ld.d $s4, $a1, %pc_lo12(R__align.intwork) + ld.d $s1, $a1, %pc_lo12(R__align.floatwork) + ld.d $a1, $sp, 368 # 8-byte Folded Reload + ld.d $s2, $a1, %pc_lo12(R__align.intwork) ori $a7, $zero, 1 st.d $a0, $sp, 424 # 8-byte Folded Spill - move $a1, $s8 - move $a2, $s7 + move $a1, $s4 + move $a2, $fp move $a3, $zero - move $a4, $s6 + move $a4, $s8 move $a5, $s1 - move $a6, $s4 + move $a6, $s2 pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(impmtx) st.d $a0, $sp, 176 # 8-byte Folded Spill - ld.d $s2, $sp, 216 # 8-byte Folded Reload - slli.d $a1, $s2, 32 - addi.d $a0, $s2, 1 + ld.d $s6, $sp, 216 # 8-byte Folded Reload + slli.d $a1, $s6, 32 + addi.d $a0, $s6, 1 st.d $a0, $sp, 8 # 8-byte Folded Spill lu12i.w $s0, 287172 xvld $xr0, $sp, 432 # 32-byte Folded Reload fcvt.d.s $fs1, $fa0 ld.d $a0, $sp, 56 # 8-byte Folded Reload - move $s3, $a0 + move $s7, $a0 + st.d $fp, $sp, 416 # 8-byte Folded Spill st.d $a1, $sp, 248 # 8-byte Folded Spill bnez $a0, .LBB2_27 # %bb.26: # %.critedge ori $a7, $zero, 1 move $a0, $s5 - move $a1, $s7 - move $a2, $s8 + move $a1, $fp + move $a2, $s4 move $a3, $zero - ld.d $fp, $sp, 464 # 8-byte Folded Reload - move $a4, $fp + move $a4, $s3 move $a5, $s1 - move $a6, $s4 + move $a6, $s2 pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 - move $t3, $s0 - move $t1, $fp + move $t3, $s6 + move $t2, $s0 ld.d $t7, $sp, 424 # 8-byte Folded Reload - move $s0, $s3 + move $s0, $s7 + ld.d $s6, $sp, 120 # 8-byte Folded Reload b .LBB2_47 .LBB2_27: - ld.d $a4, $sp, 464 # 8-byte Folded Reload - blez $s6, .LBB2_30 + blez $s8, .LBB2_30 # %bb.28: # %.lr.ph.i ld.d $a0, $sp, 176 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(impmtx) @@ -1345,55 +1343,60 @@ R__align: # @R__align ori $a7, $zero, 1 ori $fp, $zero, 1 move $a0, $s5 - move $a1, $s7 - move $a2, $s8 + ld.d $a1, $sp, 416 # 8-byte Folded Reload + move $a2, $s4 move $a3, $zero + move $a4, $s3 move $a5, $s1 - move $a6, $s4 + move $a6, $s2 pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 - ld.d $t1, $sp, 464 # 8-byte Folded Reload - bnez $t1, .LBB2_32 + bnez $s3, .LBB2_32 # %bb.31: # %imp_match_out_veadR.exit.thread pcalau12i $a0, %got_pc_hi20(outgap) ld.d $a0, $a0, %got_pc_lo12(outgap) ld.w $a0, $a0, 0 ld.d $t7, $sp, 424 # 8-byte Folded Reload - move $t3, $s0 - move $s0, $s3 + move $t2, $s0 + move $s0, $s7 + move $t3, $s6 + ld.d $s6, $sp, 120 # 8-byte Folded Reload beq $a0, $fp, .LBB2_48 b .LBB2_60 .LBB2_32: # %iter.check ld.d $a0, $sp, 176 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(impmtx) ld.d $a1, $a0, 0 - bstrpick.d $a0, $s2, 31, 0 + move $t3, $s6 + bstrpick.d $a0, $s6, 31, 0 ori $a2, $zero, 4 ld.d $t7, $sp, 424 # 8-byte Folded Reload - move $t3, $s0 + move $t2, $s0 bgeu $a0, $a2, .LBB2_34 # %bb.33: move $a2, $a1 - move $a3, $s2 + move $a3, $t3 move $a4, $s5 - move $s0, $s3 + move $s0, $s7 + ld.d $s6, $sp, 120 # 8-byte Folded Reload b .LBB2_46 .LBB2_34: # %vector.memcheck addi.w $a2, $zero, -4 lu32i.d $a2, 3 - alsl.d $a2, $s2, $a2, 2 + alsl.d $a2, $t3, $a2, 2 bstrpick.d $a2, $a2, 33, 2 slli.d $a2, $a2, 2 addi.d $a2, $a2, 4 add.d $a3, $a1, $a2 - move $s0, $s3 + move $s0, $s7 + ld.d $s6, $sp, 120 # 8-byte Folded Reload bgeu $s5, $a3, .LBB2_37 # %bb.35: # %vector.memcheck add.d $a2, $s5, $a2 bgeu $a1, $a2, .LBB2_37 # %bb.36: move $a2, $a1 - move $a3, $s2 + move $a3, $t3 move $a4, $s5 b .LBB2_46 .LBB2_37: # %vector.main.loop.iter.check @@ -1403,8 +1406,8 @@ R__align: # @R__align move $a5, $zero b .LBB2_43 .LBB2_39: # %vector.ph - andi $a2, $s2, 12 - bstrpick.d $a3, $s2, 31, 4 + andi $a2, $t3, 12 + bstrpick.d $a3, $t3, 31, 4 slli.d $a5, $a3, 4 addi.d $a3, $s5, 32 addi.d $a4, $a1, 32 @@ -1429,10 +1432,10 @@ R__align: # @R__align # %bb.42: # %vec.epilog.iter.check beqz $a2, .LBB2_300 .LBB2_43: # %vec.epilog.ph - bstrpick.d $a4, $s2, 31, 2 + bstrpick.d $a4, $t3, 31, 2 slli.d $a6, $a4, 2 alsl.d $a2, $a4, $a1, 4 - andi $a3, $s2, 3 + andi $a3, $t3, 3 alsl.d $a4, $a4, $s5, 4 alsl.d $a7, $a5, $s5, 2 alsl.d $a1, $a5, $a1, 2 @@ -1600,20 +1603,19 @@ R__align: # @R__align addi.d $t0, $t0, 4 bnez $a7, .LBB2_50 .LBB2_51: # %.preheader575 - bgtz $t1, .LBB2_70 + bgtz $s3, .LBB2_70 # %bb.52: # %.loopexit576.thread - move $a4, $t1 - ld.d $a0, $sp, 416 # 8-byte Folded Reload + ld.d $a0, $sp, 408 # 8-byte Folded Reload ld.d $t4, $a0, %pc_lo12(R__align.m) st.w $zero, $t4, 0 b .LBB2_68 .LBB2_53: # %.preheader579 - blez $t1, .LBB2_60 + blez $s3, .LBB2_60 # %bb.54: # %.lr.ph595 pcalau12i $a0, %got_pc_hi20(offset) ld.d $a0, $a0, %got_pc_lo12(offset) ld.w $a0, $a0, 0 - addi.d $a1, $s2, 1 + addi.d $a1, $t3, 1 bstrpick.d $a1, $a1, 31, 0 addi.d $a2, $a1, -1 ori $a4, $zero, 8 @@ -1627,71 +1629,52 @@ R__align: # @R__align bstrins.d $a3, $a5, 2, 0 pcalau12i $a5, %pc_hi20(.LCPI2_1) xvld $xr0, $a5, %pc_lo12(.LCPI2_1) - xvreplgr2vr.w $xr1, $a0 + pcalau12i $a5, %pc_hi20(.LCPI2_2) + xvld $xr1, $a5, %pc_lo12(.LCPI2_2) + xvreplgr2vr.w $xr2, $a0 addi.d $a5, $s5, 4 - xvldi $xr2, -800 + xvldi $xr3, -800 move $a6, $a4 .p2align 4, , 16 .LBB2_56: # %vector.body826 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr1, $xr0 - xvpermi.q $xr4, $xr3, 1 + xvmul.w $xr4, $xr2, $xr0 + xvpermi.q $xr5, $xr4, 1 + vext2xv.d.w $xr5, $xr5 + xvffint.d.l $xr5, $xr5 vext2xv.d.w $xr4, $xr4 + xvld $xr6, $a5, 0 xvffint.d.l $xr4, $xr4 - vext2xv.d.w $xr3, $xr3 - xvld $xr5, $a5, 0 - xvffint.d.l $xr3, $xr3 - xvfmul.d $xr3, $xr3, $xr2 - xvfmul.d $xr4, $xr4, $xr2 - xvpermi.q $xr6, $xr5, 1 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + xvfmul.d $xr4, $xr4, $xr3 + xvfmul.d $xr5, $xr5, $xr3 + xvpermi.q $xr7, $xr6, 1 + vreplvei.w $vr8, $vr7, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr7, 1 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 + vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr8, 16 + xvpermi.q $xr7, $xr9, 2 + vreplvei.w $vr8, $vr6, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 + fcvt.d.s $ft0, $ft0 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvpermi.q $xr6, $xr8, 2 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 + xvfadd.d $xr5, $xr7, $xr5 xvfadd.d $xr4, $xr6, $xr4 - xvfadd.d $xr3, $xr5, $xr3 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpermi.q $xr5, $xr6, 2 - xvst $xr5, $a5, 0 + xvfcvt.s.d $xr4, $xr5, $xr4 + xvperm.w $xr4, $xr4, $xr1 + xvst $xr4, $a5, 0 xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 @@ -1740,71 +1723,52 @@ R__align: # @R__align bstrins.d $a3, $a5, 2, 0 pcalau12i $a5, %pc_hi20(.LCPI2_1) xvld $xr0, $a5, %pc_lo12(.LCPI2_1) - xvreplgr2vr.w $xr1, $a0 + pcalau12i $a5, %pc_hi20(.LCPI2_2) + xvld $xr1, $a5, %pc_lo12(.LCPI2_2) + xvreplgr2vr.w $xr2, $a0 addi.d $a5, $t7, 4 - xvldi $xr2, -800 + xvldi $xr3, -800 move $a6, $a4 .p2align 4, , 16 .LBB2_63: # %vector.body841 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr1, $xr0 - xvpermi.q $xr4, $xr3, 1 + xvmul.w $xr4, $xr2, $xr0 + xvpermi.q $xr5, $xr4, 1 + vext2xv.d.w $xr5, $xr5 + xvffint.d.l $xr5, $xr5 vext2xv.d.w $xr4, $xr4 + xvld $xr6, $a5, 0 xvffint.d.l $xr4, $xr4 - vext2xv.d.w $xr3, $xr3 - xvld $xr5, $a5, 0 - xvffint.d.l $xr3, $xr3 - xvfmul.d $xr3, $xr3, $xr2 - xvfmul.d $xr4, $xr4, $xr2 - xvpermi.q $xr6, $xr5, 1 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + xvfmul.d $xr4, $xr4, $xr3 + xvfmul.d $xr5, $xr5, $xr3 + xvpermi.q $xr7, $xr6, 1 + vreplvei.w $vr8, $vr7, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr7, 1 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 + vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr8, 16 + xvpermi.q $xr7, $xr9, 2 + vreplvei.w $vr8, $vr6, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 + fcvt.d.s $ft0, $ft0 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvpermi.q $xr6, $xr8, 2 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 + xvfadd.d $xr5, $xr7, $xr5 xvfadd.d $xr4, $xr6, $xr4 - xvfadd.d $xr3, $xr5, $xr3 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpermi.q $xr5, $xr6, 2 - xvst $xr5, $a5, 0 + xvfcvt.s.d $xr4, $xr5, $xr4 + xvperm.w $xr4, $xr4, $xr1 + xvst $xr4, $a5, 0 xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 @@ -1832,20 +1796,19 @@ R__align: # @R__align addi.d $a4, $a4, 4 bnez $a1, .LBB2_66 .LBB2_67: # %.loopexit576 - ld.d $a0, $sp, 416 # 8-byte Folded Reload + ld.d $a0, $sp, 408 # 8-byte Folded Reload ld.d $t4, $a0, %pc_lo12(R__align.m) st.w $zero, $t4, 0 - move $a4, $t1 xvld $xr9, $sp, 432 # 32-byte Folded Reload - bgtz $t1, .LBB2_73 + bgtz $s3, .LBB2_73 .LBB2_68: # %._crit_edge612 ori $a0, $zero, 1 - bnez $a4, .LBB2_87 + bnez $s3, .LBB2_87 # %bb.69: movgr2fr.w $fa0, $zero ori $a1, $zero, 1 - st.d $a1, $sp, 416 # 8-byte Folded Spill - ld.d $a5, $sp, 248 # 8-byte Folded Reload + st.d $a1, $sp, 408 # 8-byte Folded Spill + ld.d $a4, $sp, 248 # 8-byte Folded Reload b .LBB2_88 .LBB2_70: # %.lr.ph607 ld.d $a2, $sp, 272 # 8-byte Folded Reload @@ -1854,7 +1817,7 @@ R__align: # @R__align ld.d $a3, $a3, %pc_lo12(R__align.digf2) ld.d $a4, $sp, 264 # 8-byte Folded Reload ld.d $a4, $a4, %pc_lo12(R__align.diaf2) - addi.d $a5, $s2, 1 + addi.d $a5, $t3, 1 bstrpick.d $a5, $a5, 31, 0 addi.d $a5, $a5, -1 movgr2fr.w $fa1, $zero @@ -1926,15 +1889,13 @@ R__align: # @R__align addi.d $a6, $a6, 4 bnez $a5, .LBB2_71 # %bb.72: # %.loopexit576.thread774 - ld.d $a0, $sp, 416 # 8-byte Folded Reload + ld.d $a0, $sp, 408 # 8-byte Folded Reload ld.d $t4, $a0, %pc_lo12(R__align.m) st.w $zero, $t4, 0 - move $a4, $t1 .LBB2_73: # %iter.check855 - move $t2, $a4 ld.d $a0, $sp, 344 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.mp) - addi.d $a1, $s2, 1 + addi.d $a1, $t3, 1 bstrpick.d $a1, $a1, 31, 0 addi.d $a2, $a1, -1 ori $a3, $zero, 4 @@ -1959,7 +1920,7 @@ R__align: # @R__align xvreplve0.w $xr0, $xr9 addi.d $a6, $a0, 36 xvrepli.b $xr1, 0 - xvreplgr2vr.w $xr2, $t3 + xvreplgr2vr.w $xr2, $t2 move $a7, $a3 .p2align 4, , 16 .LBB2_78: # %vector.body863 @@ -1993,7 +1954,7 @@ R__align: # @R__align slli.d $a7, $a3, 2 sub.d $a3, $a3, $a5 vrepli.b $vr1, 0 - vreplgr2vr.w $vr2, $t3 + vreplgr2vr.w $vr2, $t2 .p2align 4, , 16 .LBB2_82: # %vec.epilog.vector.body879 # =>This Inner Loop Header: Depth=1 @@ -2011,7 +1972,7 @@ R__align: # @R__align slli.d $a2, $a4, 2 addi.d $a3, $s5, -4 sub.d $a1, $a1, $a4 - movgr2fr.w $fa0, $t3 + movgr2fr.w $fa0, $t2 .p2align 4, , 16 .LBB2_85: # %vec.epilog.scalar.ph871 # =>This Inner Loop Header: Depth=1 @@ -2024,36 +1985,34 @@ R__align: # @R__align bnez $a1, .LBB2_85 .LBB2_86: move $a0, $zero - move $a4, $t2 .LBB2_87: # %._crit_edge612.thread - ld.d $a5, $sp, 248 # 8-byte Folded Reload + ld.d $a4, $sp, 248 # 8-byte Folded Reload ori $a1, $zero, 0 lu32i.d $a1, -1 - add.d $a1, $a5, $a1 + add.d $a1, $a4, $a1 srai.d $a1, $a1, 30 fldx.s $fa0, $s5, $a1 - st.d $a0, $sp, 416 # 8-byte Folded Spill + st.d $a0, $sp, 408 # 8-byte Folded Spill move $a0, $zero .LBB2_88: ld.d $a1, $sp, 96 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(R__align.lastverticalw) - st.d $a1, $sp, 408 # 8-byte Folded Spill + st.d $a1, $sp, 400 # 8-byte Folded Spill fst.s $fa0, $a1, 0 pcalau12i $a1, %got_pc_hi20(outgap) ld.d $a1, $a1, %got_pc_lo12(outgap) - st.d $a1, $sp, 40 # 8-byte Folded Spill + st.d $a1, $sp, 16 # 8-byte Folded Spill ld.w $a1, $a1, 0 sltu $a2, $zero, $a1 ld.d $a3, $sp, 72 # 8-byte Folded Reload add.w $a3, $a2, $a3 ori $a2, $zero, 2 - st.d $a3, $sp, 400 # 8-byte Folded Spill + st.d $a3, $sp, 392 # 8-byte Folded Spill blt $a3, $a2, .LBB2_129 # %bb.89: # %.lr.ph633 - st.d $s4, $sp, 368 # 8-byte Folded Spill + st.d $s2, $sp, 368 # 8-byte Folded Spill st.d $s1, $sp, 376 # 8-byte Folded Spill - st.d $s7, $sp, 384 # 8-byte Folded Spill - st.d $s8, $sp, 392 # 8-byte Folded Spill + st.d $s4, $sp, 384 # 8-byte Folded Spill sltui $a1, $s0, 1 or $a0, $a1, $a0 st.d $a0, $sp, 360 # 8-byte Folded Spill @@ -2069,21 +2028,21 @@ R__align: # @R__align ld.d $a0, $a0, %pc_lo12(R__align.ogcp1g) st.d $a0, $sp, 344 # 8-byte Folded Spill ld.d $a0, $sp, 336 # 8-byte Folded Reload - ld.d $a6, $a0, %pc_lo12(R__align.ogcp2g) + ld.d $a5, $a0, %pc_lo12(R__align.ogcp2g) ld.d $a0, $sp, 328 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.fgcp1g) st.d $a0, $sp, 336 # 8-byte Folded Spill ld.d $a0, $sp, 320 # 8-byte Folded Reload - ld.d $a7, $a0, %pc_lo12(R__align.fgcp2g) + ld.d $a6, $a0, %pc_lo12(R__align.fgcp2g) ld.d $a0, $sp, 272 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapz1) st.d $a0, $sp, 272 # 8-byte Folded Spill ld.d $a0, $sp, 280 # 8-byte Folded Reload - ld.d $t0, $a0, %pc_lo12(R__align.digf2) + ld.d $a7, $a0, %pc_lo12(R__align.digf2) ld.d $a0, $sp, 264 # 8-byte Folded Reload - ld.d $t1, $a0, %pc_lo12(R__align.diaf2) + ld.d $t0, $a0, %pc_lo12(R__align.diaf2) ld.d $a0, $sp, 256 # 8-byte Folded Reload - ld.d $t2, $a0, %pc_lo12(R__align.gapz2) + ld.d $t1, $a0, %pc_lo12(R__align.gapz2) ld.d $a0, $sp, 296 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.digf1) st.d $a0, $sp, 240 # 8-byte Folded Spill @@ -2092,14 +2051,14 @@ R__align: # @R__align st.d $a0, $sp, 232 # 8-byte Folded Spill ori $a0, $zero, 0 lu32i.d $a0, -1 - add.d $a0, $a5, $a0 + add.d $a0, $a4, $a0 srai.d $a0, $a0, 30 st.d $a0, $sp, 352 # 8-byte Folded Spill - addi.d $a0, $s2, 1 + addi.d $a0, $t3, 1 bstrpick.d $a0, $a0, 31, 0 - bstrpick.d $a1, $s2, 30, 0 + bstrpick.d $a1, $t3, 30, 0 st.d $a1, $sp, 224 # 8-byte Folded Spill - addi.w $a1, $s2, -1 + addi.w $a1, $t3, -1 st.d $a1, $sp, 328 # 8-byte Folded Spill bstrpick.d $a1, $a1, 31, 0 slli.d $a2, $a1, 2 @@ -2109,20 +2068,20 @@ R__align: # @R__align andi $a1, $a2, 12 st.d $a1, $sp, 136 # 8-byte Folded Spill bstrpick.d $a1, $a2, 32, 4 - slli.d $a5, $a1, 4 + slli.d $a4, $a1, 4 slli.d $a1, $a1, 6 + st.d $a1, $sp, 40 # 8-byte Folded Spill + st.d $a4, $sp, 192 # 8-byte Folded Spill + sub.d $a1, $t3, $a4 st.d $a1, $sp, 32 # 8-byte Folded Spill - st.d $a5, $sp, 192 # 8-byte Folded Spill - sub.d $a1, $s2, $a5 - st.d $a1, $sp, 24 # 8-byte Folded Spill st.d $a2, $sp, 184 # 8-byte Folded Spill bstrpick.d $a1, $a2, 32, 2 slli.d $a2, $a1, 2 slli.d $a1, $a1, 4 st.d $a1, $sp, 160 # 8-byte Folded Spill - sub.d $a1, $s2, $a2 + sub.d $a1, $t3, $a2 st.d $a1, $sp, 152 # 8-byte Folded Spill - movgr2fr.w $fs2, $t3 + movgr2fr.w $fs2, $t2 st.d $a2, $sp, 168 # 8-byte Folded Spill sub.d $a1, $zero, $a2 st.d $a1, $sp, 144 # 8-byte Folded Spill @@ -2131,18 +2090,18 @@ R__align: # @R__align movgr2fr.w $fs0, $zero st.d $t4, $sp, 320 # 8-byte Folded Spill addi.d $fp, $t4, 8 - st.d $t1, $sp, 256 # 8-byte Folded Spill - addi.d $s0, $t1, 8 - st.d $t0, $sp, 264 # 8-byte Folded Spill - addi.d $s1, $t0, 8 - st.d $a7, $sp, 280 # 8-byte Folded Spill - addi.d $s2, $a7, 8 - st.d $a6, $sp, 288 # 8-byte Folded Spill - addi.d $s6, $a6, 8 - st.d $t2, $sp, 248 # 8-byte Folded Spill - addi.d $s4, $t2, 12 + st.d $t0, $sp, 256 # 8-byte Folded Spill + addi.d $s0, $t0, 8 + st.d $a7, $sp, 264 # 8-byte Folded Spill + addi.d $s1, $a7, 8 + st.d $a6, $sp, 280 # 8-byte Folded Spill + addi.d $s2, $a6, 8 + st.d $a5, $sp, 288 # 8-byte Folded Spill + addi.d $s6, $a5, 8 + st.d $t1, $sp, 248 # 8-byte Folded Spill + addi.d $s8, $t1, 12 st.d $a3, $sp, 296 # 8-byte Folded Spill - addi.d $s8, $a3, 8 + addi.d $s3, $a3, 8 ori $s7, $zero, 1 b .LBB2_91 .p2align 4, , 16 @@ -2150,13 +2109,12 @@ R__align: # @R__align # in Loop: Header=BB2_91 Depth=1 st.d $s5, $sp, 472 # 8-byte Folded Spill ld.d $a1, $sp, 352 # 8-byte Folded Reload - fldx.s $fa0, $s3, $a1 + fldx.s $fa0, $s4, $a1 addi.d $s7, $s7, 1 - ld.d $a1, $sp, 408 # 8-byte Folded Reload + ld.d $a1, $sp, 400 # 8-byte Folded Reload fstx.s $fa0, $a1, $a0 - move $s5, $s3 - ld.d $a4, $sp, 464 # 8-byte Folded Reload - ld.d $a0, $sp, 400 # 8-byte Folded Reload + move $s5, $s4 + ld.d $a0, $sp, 392 # 8-byte Folded Reload beq $s7, $a0, .LBB2_130 .LBB2_91: # =>This Loop Header: Depth=1 # Child Loop BB2_122 Depth 2 @@ -2166,12 +2124,13 @@ R__align: # @R__align addi.d $a0, $s7, -1 slli.d $a0, $a0, 2 fldx.s $fa0, $t7, $a0 - ld.d $s3, $sp, 472 # 8-byte Folded Reload + ld.d $s4, $sp, 472 # 8-byte Folded Reload fst.s $fa0, $s5, 0 - move $a0, $s3 - ld.d $a1, $sp, 384 # 8-byte Folded Reload - ld.d $a2, $sp, 392 # 8-byte Folded Reload + move $a0, $s4 + ld.d $a1, $sp, 416 # 8-byte Folded Reload + ld.d $a2, $sp, 384 # 8-byte Folded Reload move $a3, $s7 + ld.d $a4, $sp, 464 # 8-byte Folded Reload ld.d $a5, $sp, 376 # 8-byte Folded Reload ld.d $a6, $sp, 368 # 8-byte Folded Reload move $a7, $zero @@ -2192,15 +2151,15 @@ R__align: # @R__align # in Loop: Header=BB2_91 Depth=1 ld.d $a1, $sp, 208 # 8-byte Folded Reload add.d $a0, $a2, $a1 - bgeu $s3, $a0, .LBB2_119 + bgeu $s4, $a0, .LBB2_119 # %bb.94: # %vector.memcheck888 # in Loop: Header=BB2_91 Depth=1 - add.d $a0, $s3, $a1 + add.d $a0, $s4, $a1 bgeu $a2, $a0, .LBB2_119 .LBB2_95: # in Loop: Header=BB2_91 Depth=1 move $a0, $a2 ld.d $a2, $sp, 216 # 8-byte Folded Reload - move $a1, $s3 + move $a1, $s4 .p2align 4, , 16 .LBB2_96: # %.lr.ph.i509 # Parent Loop BB2_91 Depth=1 @@ -2219,8 +2178,8 @@ R__align: # @R__align slli.d $a0, $s7, 2 ld.d $t7, $sp, 424 # 8-byte Folded Reload fldx.s $fa0, $t7, $a0 - fst.s $fa0, $s3, 0 - ld.d $a1, $sp, 416 # 8-byte Folded Reload + fst.s $fa0, $s4, 0 + ld.d $a1, $sp, 408 # 8-byte Folded Reload vldi $vr18, -912 vldi $vr19, -928 ori $t8, $zero, 1 @@ -2360,9 +2319,9 @@ R__align: # @R__align ld.d $a4, $sp, 296 # 8-byte Folded Reload st.w $ra, $a4, 4 .LBB2_106: # in Loop: Header=BB2_91 Depth=1 - fld.s $fa4, $s3, 4 + fld.s $fa4, $s4, 4 fadd.s $fa4, $fs0, $fa4 - fst.s $fa4, $s3, 4 + fst.s $fa4, $s4, 4 ld.d $a4, $sp, 224 # 8-byte Folded Reload beq $a4, $t8, .LBB2_90 # %bb.107: # %.peel.next @@ -2385,7 +2344,7 @@ R__align: # @R__align fcmp.cle.s $fcc0, $fa0, $fa1 fsel $fa0, $fa0, $fa1, $fcc0 addi.d $a7, $s5, 4 - addi.d $t0, $s3, 8 + addi.d $t0, $s4, 8 addi.d $t1, $t1, 8 ori $t2, $zero, 1 addi.d $t3, $zero, -2 @@ -2394,7 +2353,7 @@ R__align: # @R__align .p2align 4, , 16 .LBB2_108: # in Loop: Header=BB2_110 Depth=2 fstx.s $fa2, $fp, $a4 - stx.w $ra, $s8, $a4 + stx.w $ra, $s3, $a4 .LBB2_109: # in Loop: Header=BB2_110 Depth=2 fldx.s $fa1, $t0, $a4 fadd.s $fa1, $fs0, $fa1 @@ -2485,7 +2444,7 @@ R__align: # @R__align fcvt.s.d $ft1, $fa0 move $t6, $t5 .LBB2_114: # in Loop: Header=BB2_110 Depth=2 - fldx.s $fa0, $s4, $a4 + fldx.s $fa0, $s8, $a4 fcvt.d.s $fa0, $fa0 fld.s $fa7, $a2, 0 fadd.d $ft0, $ft0, $fa6 @@ -2507,12 +2466,12 @@ R__align: # @R__align move $t5, $t6 bcnez $fcc0, .LBB2_116 # %bb.115: # in Loop: Header=BB2_110 Depth=2 - ldx.w $t6, $s8, $a4 + ldx.w $t6, $s3, $a4 sub.d $t6, $s7, $t6 stx.w $t6, $t1, $a4 fmov.s $fs0, $ft0 .LBB2_116: # in Loop: Header=BB2_110 Depth=2 - add.d $t6, $s4, $a4 + add.d $t6, $s8, $a4 fld.s $ft0, $t6, -4 fcvt.d.s $ft0, $ft0 fsub.d $ft1, $ft10, $ft0 @@ -2547,7 +2506,7 @@ R__align: # @R__align b .LBB2_125 .LBB2_121: # %vector.body903.preheader # in Loop: Header=BB2_91 Depth=1 - addi.d $a0, $s3, 32 + addi.d $a0, $s4, 32 addi.d $a1, $a2, 32 ld.d $a3, $sp, 192 # 8-byte Folded Reload .p2align 4, , 16 @@ -2580,8 +2539,8 @@ R__align: # @R__align # in Loop: Header=BB2_91 Depth=1 ld.d $a1, $sp, 160 # 8-byte Folded Reload add.d $a0, $a2, $a1 - add.d $a1, $s3, $a1 - alsl.d $a3, $a4, $s3, 2 + add.d $a1, $s4, $a1 + alsl.d $a3, $a4, $s4, 2 alsl.d $a2, $a4, $a2, 2 ld.d $a5, $sp, 144 # 8-byte Folded Reload add.d $a4, $a5, $a4 @@ -2605,26 +2564,26 @@ R__align: # @R__align bne $a3, $a4, .LBB2_96 b .LBB2_97 .LBB2_128: # in Loop: Header=BB2_91 Depth=1 - ld.d $a1, $sp, 32 # 8-byte Folded Reload + ld.d $a1, $sp, 40 # 8-byte Folded Reload add.d $a0, $a2, $a1 - add.d $a1, $s3, $a1 - ld.d $a2, $sp, 24 # 8-byte Folded Reload + add.d $a1, $s4, $a1 + ld.d $a2, $sp, 32 # 8-byte Folded Reload b .LBB2_96 .LBB2_129: movgr2fr.w $fs0, $zero - ld.d $s1, $sp, 128 # 8-byte Folded Reload + ld.d $s1, $sp, 16 # 8-byte Folded Reload beqz $a1, .LBB2_131 b .LBB2_145 .LBB2_130: # %._crit_edge634.loopexit - ld.d $a0, $sp, 40 # 8-byte Folded Reload - ld.w $a1, $a0, 0 - move $s5, $s3 - ld.d $s1, $sp, 128 # 8-byte Folded Reload + ld.d $s1, $sp, 16 # 8-byte Folded Reload + ld.w $a1, $s1, 0 + move $s5, $s4 + ld.d $s6, $sp, 120 # 8-byte Folded Reload ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $s2, $sp, 216 # 8-byte Folded Reload + ld.d $t3, $sp, 216 # 8-byte Folded Reload bnez $a1, .LBB2_145 .LBB2_131: # %.preheader574 - ld.d $a0, $sp, 416 # 8-byte Folded Reload + ld.d $a0, $sp, 408 # 8-byte Folded Reload bnez $a0, .LBB2_138 # %bb.132: # %.lr.ph639 pcalau12i $a0, %got_pc_hi20(offset) @@ -2643,19 +2602,29 @@ R__align: # @R__align move $a3, $a2 bstrins.d $a3, $a5, 2, 0 xvreplgr2vr.w $xr0, $a0 - pcalau12i $a5, %pc_hi20(.LCPI2_2) - xvld $xr1, $a5, %pc_lo12(.LCPI2_2) pcalau12i $a5, %pc_hi20(.LCPI2_3) - xvld $xr2, $a5, %pc_lo12(.LCPI2_3) - xvreplgr2vr.d $xr3, $s2 + xvld $xr1, $a5, %pc_lo12(.LCPI2_3) + pcalau12i $a5, %pc_hi20(.LCPI2_4) + xvld $xr2, $a5, %pc_lo12(.LCPI2_4) + pcalau12i $a5, %pc_hi20(.LCPI2_2) + xvld $xr3, $a5, %pc_lo12(.LCPI2_2) + xvreplgr2vr.d $xr4, $t3 addi.d $a5, $s5, 4 - xvldi $xr4, -800 + xvldi $xr5, -800 move $a6, $a4 .p2align 4, , 16 .LBB2_134: # %vector.body949 # =>This Inner Loop Header: Depth=1 - xvsub.d $xr5, $xr3, $xr1 - xvsub.d $xr6, $xr3, $xr2 + xvsub.d $xr6, $xr4, $xr1 + xvsub.d $xr7, $xr4, $xr2 + xvpickve2gr.d $a7, $xr7, 0 + vinsgr2vr.w $vr8, $a7, 0 + xvpickve2gr.d $a7, $xr7, 1 + vinsgr2vr.w $vr8, $a7, 1 + xvpickve2gr.d $a7, $xr7, 2 + vinsgr2vr.w $vr8, $a7, 2 + xvpickve2gr.d $a7, $xr7, 3 + vinsgr2vr.w $vr8, $a7, 3 xvpickve2gr.d $a7, $xr6, 0 vinsgr2vr.w $vr7, $a7, 0 xvpickve2gr.d $a7, $xr6, 1 @@ -2664,73 +2633,44 @@ R__align: # @R__align vinsgr2vr.w $vr7, $a7, 2 xvpickve2gr.d $a7, $xr6, 3 vinsgr2vr.w $vr7, $a7, 3 - xvpickve2gr.d $a7, $xr5, 0 - vinsgr2vr.w $vr6, $a7, 0 - xvpickve2gr.d $a7, $xr5, 1 - vinsgr2vr.w $vr6, $a7, 1 - xvpickve2gr.d $a7, $xr5, 2 - vinsgr2vr.w $vr6, $a7, 2 - xvpickve2gr.d $a7, $xr5, 3 - vinsgr2vr.w $vr6, $a7, 3 - xvpermi.q $xr7, $xr6, 2 - xvmul.w $xr5, $xr0, $xr7 - xvpermi.q $xr6, $xr5, 1 + xvpermi.q $xr8, $xr7, 2 + xvmul.w $xr6, $xr0, $xr8 + xvpermi.q $xr7, $xr6, 1 + vext2xv.d.w $xr7, $xr7 + xvffint.d.l $xr7, $xr7 vext2xv.d.w $xr6, $xr6 + xvld $xr8, $a5, 0 xvffint.d.l $xr6, $xr6 - vext2xv.d.w $xr5, $xr5 - xvld $xr7, $a5, 0 - xvffint.d.l $xr5, $xr5 - xvfmul.d $xr5, $xr5, $xr4 - xvfmul.d $xr6, $xr6, $xr4 - xvpermi.q $xr8, $xr7, 1 - vreplvei.w $vr9, $vr8, 3 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr10, $vr8, 2 + xvfmul.d $xr6, $xr6, $xr5 + xvfmul.d $xr7, $xr7, $xr5 + xvpermi.q $xr9, $xr8, 1 + vreplvei.w $vr10, $vr9, 3 + fcvt.d.s $ft2, $ft2 + vreplvei.w $vr11, $vr9, 2 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr10, 16 + vreplvei.w $vr10, $vr9, 1 fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr9, 16 - vreplvei.w $vr9, $vr8, 1 + vreplvei.w $vr9, $vr9, 0 fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr10, 16 + xvpermi.q $xr9, $xr11, 2 + vreplvei.w $vr10, $vr8, 3 + fcvt.d.s $ft2, $ft2 + vreplvei.w $vr11, $vr8, 2 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr10, 16 + vreplvei.w $vr10, $vr8, 1 + fcvt.d.s $ft2, $ft2 vreplvei.w $vr8, $vr8, 0 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr9, 16 - xvpermi.q $xr8, $xr10, 2 - vreplvei.w $vr9, $vr7, 3 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr10, $vr7, 2 - fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr9, 16 - vreplvei.w $vr9, $vr7, 1 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr7, $vr7, 0 - fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr9, 16 - xvpermi.q $xr7, $xr10, 2 + vextrins.d $vr8, $vr10, 16 + xvpermi.q $xr8, $xr11, 2 + xvfadd.d $xr7, $xr9, $xr7 xvfadd.d $xr6, $xr8, $xr6 - xvfadd.d $xr5, $xr7, $xr5 - xvpickve.d $xr7, $xr6, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr6, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr6, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr6, $xr6, 3 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr8, $vr6, 48 - xvpickve.d $xr6, $xr5, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr5, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr5, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 48 - xvpermi.q $xr7, $xr8, 2 - xvst $xr7, $a5, 0 + xvfcvt.s.d $xr6, $xr7, $xr6 + xvperm.w $xr6, $xr6, $xr3 + xvst $xr6, $a5, 0 xvaddi.du $xr2, $xr2, 8 xvaddi.du $xr1, $xr1, 8 addi.d $a6, $a6, -8 @@ -2739,7 +2679,7 @@ R__align: # @R__align # %bb.135: # %middle.block956 beq $a2, $a4, .LBB2_138 .LBB2_136: # %scalar.ph940.preheader - sub.w $a2, $s2, $a3 + sub.w $a2, $t3, $a3 mul.d $a2, $a0, $a2 alsl.d $a4, $a3, $s5, 2 sub.d $a1, $a1, $a3 @@ -2788,8 +2728,8 @@ R__align: # @R__align move $a0, $a3 bstrins.d $a0, $a5, 1, 0 xvreplve0.d $xr2, $xr0 - pcalau12i $a5, %pc_hi20(.LCPI2_4) - vld $vr3, $a5, %pc_lo12(.LCPI2_4) + pcalau12i $a5, %pc_hi20(.LCPI2_5) + vld $vr3, $a5, %pc_lo12(.LCPI2_5) xvreplve0.d $xr4, $xr1 addi.d $a5, $a1, 4 xvldi $xr5, -800 @@ -2814,18 +2754,9 @@ R__align: # @R__align vextrins.d $vr7, $vr8, 16 xvpermi.q $xr7, $xr9, 2 xvfmadd.d $xr6, $xr4, $xr6, $xr7 - xvpickve.d $xr7, $xr6, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr6, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr6, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr6, $xr6, 3 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr8, $vr6, 48 - vst $vr8, $a5, 0 + xvpermi.q $xr7, $xr6, 1 + vfcvt.s.d $vr6, $vr7, $vr6 + vst $vr6, $a5, 0 vaddi.wu $vr3, $vr3, 4 addi.d $a6, $a6, -4 addi.d $a5, $a5, 16 @@ -2857,17 +2788,17 @@ R__align: # @R__align ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.d $fp, $a0, %pc_lo12(R__align.lastverticalw) ld.d $a0, $sp, 104 # 8-byte Folded Reload - ld.d $s3, $a0, %pc_lo12(R__align.mseq1) + ld.d $s4, $a0, %pc_lo12(R__align.mseq1) ld.d $a0, $sp, 80 # 8-byte Folded Reload - ld.d $s4, $a0, %pc_lo12(R__align.mseq2) + ld.d $s3, $a0, %pc_lo12(R__align.mseq2) ld.d $a0, $sp, 48 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.ijp) st.d $a0, $sp, 464 # 8-byte Folded Spill - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a0, $a0, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 - ld.d $a1, $s1, 0 + ld.d $a1, $s6, 0 move $s7, $a0 move $a0, $a1 pcaddu18i $ra, %call36(strlen) @@ -2882,8 +2813,7 @@ R__align: # @R__align move $a0, $s8 pcaddu18i $ra, %call36(AllocateCharVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 40 # 8-byte Folded Reload - ld.w $a2, $a1, 0 + ld.w $a2, $s1, 0 addi.w $a1, $s7, 0 st.d $a1, $sp, 472 # 8-byte Folded Spill addi.w $a1, $s6, 0 @@ -3027,8 +2957,8 @@ R__align: # @R__align move $a3, $zero b .LBB2_185 .LBB2_174: # %vector.ph1074 - pcalau12i $a4, %pc_hi20(.LCPI2_5) - vld $vr0, $a4, %pc_lo12(.LCPI2_5) + pcalau12i $a4, %pc_hi20(.LCPI2_6) + vld $vr0, $a4, %pc_lo12(.LCPI2_6) bstrpick.d $a3, $a3, 31, 2 slli.d $a3, $a3, 2 addi.d $a4, $s1, 16 @@ -3080,8 +3010,8 @@ R__align: # @R__align move $a4, $zero b .LBB2_201 .LBB2_182: # %vector.ph980 - pcalau12i $a4, %pc_hi20(.LCPI2_5) - vld $vr0, $a4, %pc_lo12(.LCPI2_5) + pcalau12i $a4, %pc_hi20(.LCPI2_6) + vld $vr0, $a4, %pc_lo12(.LCPI2_6) bstrpick.d $a3, $a3, 31, 2 slli.d $a3, $a3, 2 addi.d $a4, $ra, 16 @@ -3149,8 +3079,8 @@ R__align: # @R__align andi $a6, $a5, 12 bstrpick.d $a4, $a5, 31, 4 slli.d $a4, $a4, 4 - pcalau12i $a7, %pc_hi20(.LCPI2_6) - xvld $xr0, $a7, %pc_lo12(.LCPI2_6) + pcalau12i $a7, %pc_hi20(.LCPI2_7) + xvld $xr0, $a7, %pc_lo12(.LCPI2_7) addi.d $a7, $a2, 32 xvrepli.b $xr1, -1 xvrepli.w $xr2, -9 @@ -3172,8 +3102,8 @@ R__align: # @R__align beqz $a6, .LBB2_201 .LBB2_198: # %vec.epilog.ph1109 move $a6, $a4 - pcalau12i $a4, %pc_hi20(.LCPI2_7) - vld $vr0, $a4, %pc_lo12(.LCPI2_7) + pcalau12i $a4, %pc_hi20(.LCPI2_8) + vld $vr0, $a4, %pc_lo12(.LCPI2_8) bstrpick.d $a4, $a5, 31, 2 slli.d $a4, $a4, 2 vreplgr2vr.w $vr1, $a6 @@ -3405,32 +3335,32 @@ R__align: # @R__align move $s7, $a6 bge $s2, $a5, .LBB2_205 .LBB2_233: # %._crit_edge42.i547 - ld.d $s6, $sp, 120 # 8-byte Folded Reload + ld.d $s6, $sp, 112 # 8-byte Folded Reload bgtz $s6, .LBB2_235 b .LBB2_237 .LBB2_234: move $s8, $t3 move $s5, $t2 - ld.d $s6, $sp, 120 # 8-byte Folded Reload + ld.d $s6, $sp, 112 # 8-byte Folded Reload blez $s6, .LBB2_237 .LBB2_235: # %.lr.ph50.preheader.i556 - ld.d $fp, $sp, 112 # 8-byte Folded Reload + ld.d $fp, $sp, 128 # 8-byte Folded Reload move $s0, $s6 .p2align 4, , 16 .LBB2_236: # %.lr.ph50.i558 # =>This Inner Loop Header: Depth=1 - ld.d $a0, $s3, 0 + ld.d $a0, $s4, 0 ld.d $a1, $fp, 0 move $a2, $s8 pcaddu18i $ra, %call36(gapireru) jirl $ra, $ra, 0 addi.d $s0, $s0, -1 addi.d $fp, $fp, 8 - addi.d $s3, $s3, 8 + addi.d $s4, $s4, 8 bnez $s0, .LBB2_236 .LBB2_237: # %.preheader.i549 ld.d $s1, $sp, 64 # 8-byte Folded Reload - ld.d $s2, $sp, 128 # 8-byte Folded Reload + ld.d $s2, $sp, 120 # 8-byte Folded Reload blez $s1, .LBB2_289 # %bb.238: # %.lr.ph52.preheader.i550 move $fp, $s2 @@ -3438,22 +3368,22 @@ R__align: # @R__align .p2align 4, , 16 .LBB2_239: # %.lr.ph52.i552 # =>This Inner Loop Header: Depth=1 - ld.d $a0, $s4, 0 + ld.d $a0, $s3, 0 ld.d $a1, $fp, 0 move $a2, $s5 pcaddu18i $ra, %call36(gapireru) jirl $ra, $ra, 0 addi.d $s0, $s0, -1 addi.d $fp, $fp, 8 - addi.d $s4, $s4, 8 + addi.d $s3, $s3, 8 bnez $s0, .LBB2_239 b .LBB2_289 .LBB2_240: # %vector.ph999 andi $a7, $a6, 12 bstrpick.d $a5, $a6, 31, 4 slli.d $a5, $a5, 4 - pcalau12i $t0, %pc_hi20(.LCPI2_6) - xvld $xr0, $t0, %pc_lo12(.LCPI2_6) + pcalau12i $t0, %pc_hi20(.LCPI2_7) + xvld $xr0, $t0, %pc_lo12(.LCPI2_7) addi.d $t0, $a3, 32 xvrepli.b $xr1, -1 xvrepli.w $xr2, -9 @@ -3475,8 +3405,8 @@ R__align: # @R__align beqz $a7, .LBB2_247 .LBB2_244: # %vec.epilog.ph1014 move $a7, $a5 - pcalau12i $a5, %pc_hi20(.LCPI2_7) - vld $vr0, $a5, %pc_lo12(.LCPI2_7) + pcalau12i $a5, %pc_hi20(.LCPI2_8) + vld $vr0, $a5, %pc_lo12(.LCPI2_8) bstrpick.d $a5, $a6, 31, 2 slli.d $a5, $a5, 2 vreplgr2vr.w $vr1, $a7 @@ -3729,32 +3659,32 @@ R__align: # @R__align move $s7, $t1 bge $s2, $t0, .LBB2_251 .LBB2_282: # %._crit_edge42.i - ld.d $s6, $sp, 120 # 8-byte Folded Reload + ld.d $s6, $sp, 112 # 8-byte Folded Reload bgtz $s6, .LBB2_284 b .LBB2_286 .LBB2_283: move $s8, $t8 move $s5, $t7 - ld.d $s6, $sp, 120 # 8-byte Folded Reload + ld.d $s6, $sp, 112 # 8-byte Folded Reload blez $s6, .LBB2_286 .LBB2_284: # %.lr.ph50.preheader.i - ld.d $fp, $sp, 112 # 8-byte Folded Reload + ld.d $fp, $sp, 128 # 8-byte Folded Reload move $s0, $s6 .p2align 4, , 16 .LBB2_285: # %.lr.ph50.i # =>This Inner Loop Header: Depth=1 - ld.d $a0, $s3, 0 + ld.d $a0, $s4, 0 ld.d $a1, $fp, 0 move $a2, $s8 pcaddu18i $ra, %call36(gapireru) jirl $ra, $ra, 0 addi.d $s0, $s0, -1 addi.d $fp, $fp, 8 - addi.d $s3, $s3, 8 + addi.d $s4, $s4, 8 bnez $s0, .LBB2_285 .LBB2_286: # %.preheader.i ld.d $s1, $sp, 64 # 8-byte Folded Reload - ld.d $s2, $sp, 128 # 8-byte Folded Reload + ld.d $s2, $sp, 120 # 8-byte Folded Reload blez $s1, .LBB2_289 # %bb.287: # %.lr.ph52.preheader.i move $fp, $s2 @@ -3762,14 +3692,14 @@ R__align: # @R__align .p2align 4, , 16 .LBB2_288: # %.lr.ph52.i # =>This Inner Loop Header: Depth=1 - ld.d $a0, $s4, 0 + ld.d $a0, $s3, 0 ld.d $a1, $fp, 0 move $a2, $s5 pcaddu18i $ra, %call36(gapireru) jirl $ra, $ra, 0 addi.d $s0, $s0, -1 addi.d $fp, $fp, 8 - addi.d $s4, $s4, 8 + addi.d $s3, $s3, 8 bnez $s0, .LBB2_288 .LBB2_289: # %Atracking_localhom.exit ld.d $a0, $sp, 432 # 8-byte Folded Reload @@ -3785,8 +3715,7 @@ R__align: # @R__align jirl $ra, $ra, 0 addi.w $a3, $a0, 0 lu12i.w $a4, 1220 - ld.d $s0, $sp, 112 # 8-byte Folded Reload - ld.d $a2, $sp, 16 # 8-byte Folded Reload + ld.d $a2, $sp, 24 # 8-byte Folded Reload blt $a2, $a3, .LBB2_291 # %bb.290: # %Atracking_localhom.exit ori $a0, $a4, 2881 @@ -3805,6 +3734,7 @@ R__align: # @R__align pcaddu18i $ra, %call36(ErrorExit) jirl $ra, $ra, 0 .LBB2_292: + ld.d $s0, $sp, 128 # 8-byte Folded Reload blez $s6, .LBB2_295 # %bb.293: # %.lr.ph645 ld.d $a0, $sp, 104 # 8-byte Folded Reload @@ -3866,7 +3796,7 @@ R__align: # @R__align b .LBB2_84 .LBB2_300: alsl.d $a2, $a5, $a1, 2 - sub.d $a3, $s2, $a5 + sub.d $a3, $t3, $a5 alsl.d $a4, $a5, $s5, 2 b .LBB2_46 .Lfunc_end2: diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/SAalignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/SAalignmm.s index 19e657f..a3f178b 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/SAalignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/SAalignmm.s @@ -3,19 +3,18 @@ .p2align 3, 0x0 # -- Begin function Aalign .LCPI0_0: .dword 0x3ff199999999999a # double 1.1000000000000001 - .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 + .section .rodata.cst32,"aM",@progbits,32 + .p2align 5, 0x0 .LCPI0_1: - .dword 0 # 0x0 - .dword 1 # 0x1 -.LCPI0_3: .word 0 # 0x0 .word 1 # 0x1 + .word 4 # 0x4 + .word 5 # 0x5 .word 2 # 0x2 .word 3 # 0x3 - .section .rodata.cst32,"aM",@progbits,32 - .p2align 5, 0x0 -.LCPI0_2: + .word 6 # 0x6 + .word 7 # 0x7 +.LCPI0_3: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -24,6 +23,16 @@ .word 5 # 0x5 .word 6 # 0x6 .word 7 # 0x7 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4, 0x0 +.LCPI0_2: + .dword 0 # 0x0 + .dword 1 # 0x1 +.LCPI0_4: + .word 0 # 0x0 + .word 1 # 0x1 + .word 2 # 0x2 + .word 3 # 0x3 .text .globl Aalign .p2align 5 @@ -43,8 +52,8 @@ Aalign: # @Aalign st.d $s7, $sp, 192 # 8-byte Folded Spill st.d $s8, $sp, 184 # 8-byte Folded Spill fst.d $fs0, $sp, 176 # 8-byte Folded Spill - pcalau12i $s8, %pc_hi20(Aalign.orlgth1) - ld.w $s5, $s8, %pc_lo12(Aalign.orlgth1) + pcalau12i $s6, %pc_hi20(Aalign.orlgth1) + ld.w $s5, $s6, %pc_lo12(Aalign.orlgth1) st.d $a6, $sp, 24 # 8-byte Folded Spill move $fp, $a5 move $s1, $a4 @@ -71,7 +80,7 @@ Aalign: # @Aalign move $a0, $a2 pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 - ld.w $s5, $s8, %pc_lo12(Aalign.orlgth1) + ld.w $s5, $s6, %pc_lo12(Aalign.orlgth1) ld.d $a1, $sp, 40 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Aalign.mseq2) .LBB0_2: @@ -98,7 +107,7 @@ Aalign: # @Aalign st.d $a1, $sp, 64 # 8-byte Folded Spill pcalau12i $a1, %pc_hi20(Aalign.mp) st.d $a1, $sp, 56 # 8-byte Folded Spill - pcalau12i $s6, %pc_hi20(Aalign.mseq) + pcalau12i $s8, %pc_hi20(Aalign.mseq) pcalau12i $a1, %pc_hi20(Aalign.cpmx1) st.d $a1, $sp, 144 # 8-byte Folded Spill pcalau12i $a1, %pc_hi20(Aalign.cpmx2) @@ -116,7 +125,7 @@ Aalign: # @Aalign # %bb.4: blez $s1, .LBB0_12 .LBB0_5: # %.lr.ph - ld.d $a0, $s6, %pc_lo12(Aalign.mseq) + ld.d $a0, $s8, %pc_lo12(Aalign.mseq) ld.d $a1, $sp, 32 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(Aalign.mseq1) ori $a3, $zero, 8 @@ -161,7 +170,7 @@ Aalign: # @Aalign .LBB0_12: # %.preheader177 blez $fp, .LBB0_18 # %bb.13: # %.lr.ph181 - ld.d $a0, $s6, %pc_lo12(Aalign.mseq) + ld.d $a0, $s8, %pc_lo12(Aalign.mseq) ld.d $a1, $sp, 40 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(Aalign.mseq2) ori $a2, $zero, 8 @@ -290,64 +299,45 @@ Aalign: # @Aalign ori $a5, $zero, 1 move $a3, $a2 bstrins.d $a3, $a5, 2, 0 - xvreplve0.d $xr1, $xr0 + pcalau12i $a5, %pc_hi20(.LCPI0_1) + xvld $xr1, $a5, %pc_lo12(.LCPI0_1) + xvreplve0.d $xr2, $xr0 ld.d $a5, $sp, 80 # 8-byte Folded Reload addi.d $a5, $a5, 4 - xvldi $xr2, -928 + xvldi $xr3, -928 move $a6, $a4 .p2align 4, , 16 .LBB0_24: # %vector.body318 # =>This Inner Loop Header: Depth=1 - xvld $xr3, $a5, 0 - xvpermi.q $xr4, $xr3, 1 - vreplvei.w $vr5, $vr4, 3 - fcvt.d.s $fa5, $fa5 - vreplvei.w $vr6, $vr4, 2 + xvld $xr4, $a5, 0 + xvpermi.q $xr5, $xr4, 1 + vreplvei.w $vr6, $vr5, 3 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr7, $vr5, 2 + fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr6, 16 + vreplvei.w $vr6, $vr5, 1 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr5, 16 - vreplvei.w $vr5, $vr4, 1 + vreplvei.w $vr5, $vr5, 0 fcvt.d.s $fa5, $fa5 + vextrins.d $vr5, $vr6, 16 + xvpermi.q $xr5, $xr7, 2 + vreplvei.w $vr6, $vr4, 3 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr7, $vr4, 2 + fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr6, 16 + vreplvei.w $vr6, $vr4, 1 + fcvt.d.s $fa6, $fa6 vreplvei.w $vr4, $vr4, 0 fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr5, 16 - xvpermi.q $xr4, $xr6, 2 - vreplvei.w $vr5, $vr3, 3 - fcvt.d.s $fa5, $fa5 - vreplvei.w $vr6, $vr3, 2 - fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr5, 16 - vreplvei.w $vr5, $vr3, 1 - fcvt.d.s $fa5, $fa5 - vreplvei.w $vr3, $vr3, 0 - fcvt.d.s $fa3, $fa3 - vextrins.d $vr3, $vr5, 16 - xvpermi.q $xr3, $xr6, 2 - xvfmadd.d $xr4, $xr1, $xr2, $xr4 - xvfmadd.d $xr3, $xr1, $xr2, $xr3 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpermi.q $xr5, $xr6, 2 - xvst $xr5, $a5, 0 + vextrins.d $vr4, $vr6, 16 + xvpermi.q $xr4, $xr7, 2 + xvfmadd.d $xr5, $xr2, $xr3, $xr5 + xvfmadd.d $xr4, $xr2, $xr3, $xr4 + xvfcvt.s.d $xr4, $xr5, $xr4 + xvperm.w $xr4, $xr4, $xr1 + xvst $xr4, $a5, 0 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 bnez $a6, .LBB0_24 @@ -388,63 +378,44 @@ Aalign: # @Aalign ori $a5, $zero, 1 move $a3, $a2 bstrins.d $a3, $a5, 2, 0 - xvreplve0.d $xr1, $xr0 + pcalau12i $a5, %pc_hi20(.LCPI0_1) + xvld $xr1, $a5, %pc_lo12(.LCPI0_1) + xvreplve0.d $xr2, $xr0 addi.d $a5, $s7, 4 - xvldi $xr2, -928 + xvldi $xr3, -928 move $a6, $a4 .p2align 4, , 16 .LBB0_31: # %vector.body332 # =>This Inner Loop Header: Depth=1 - xvld $xr3, $a5, 0 - xvpermi.q $xr4, $xr3, 1 - vreplvei.w $vr5, $vr4, 3 - fcvt.d.s $fa5, $fa5 - vreplvei.w $vr6, $vr4, 2 + xvld $xr4, $a5, 0 + xvpermi.q $xr5, $xr4, 1 + vreplvei.w $vr6, $vr5, 3 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr5, 16 - vreplvei.w $vr5, $vr4, 1 + vreplvei.w $vr7, $vr5, 2 + fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr6, 16 + vreplvei.w $vr6, $vr5, 1 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr5, $vr5, 0 fcvt.d.s $fa5, $fa5 + vextrins.d $vr5, $vr6, 16 + xvpermi.q $xr5, $xr7, 2 + vreplvei.w $vr6, $vr4, 3 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr7, $vr4, 2 + fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr6, 16 + vreplvei.w $vr6, $vr4, 1 + fcvt.d.s $fa6, $fa6 vreplvei.w $vr4, $vr4, 0 fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr5, 16 - xvpermi.q $xr4, $xr6, 2 - vreplvei.w $vr5, $vr3, 3 - fcvt.d.s $fa5, $fa5 - vreplvei.w $vr6, $vr3, 2 - fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr5, 16 - vreplvei.w $vr5, $vr3, 1 - fcvt.d.s $fa5, $fa5 - vreplvei.w $vr3, $vr3, 0 - fcvt.d.s $fa3, $fa3 - vextrins.d $vr3, $vr5, 16 - xvpermi.q $xr3, $xr6, 2 - xvfmadd.d $xr4, $xr1, $xr2, $xr4 - xvfmadd.d $xr3, $xr1, $xr2, $xr3 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpermi.q $xr5, $xr6, 2 - xvst $xr5, $a5, 0 + vextrins.d $vr4, $vr6, 16 + xvpermi.q $xr4, $xr7, 2 + xvfmadd.d $xr5, $xr2, $xr3, $xr5 + xvfmadd.d $xr4, $xr2, $xr3, $xr4 + xvfcvt.s.d $xr4, $xr5, $xr4 + xvperm.w $xr4, $xr4, $xr1 + xvst $xr4, $a5, 0 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 bnez $a6, .LBB0_31 @@ -531,67 +502,48 @@ Aalign: # @Aalign vldrepl.w $vr0, $a5, 0 bstrpick.d $a3, $a0, 31, 3 slli.d $a3, $a3, 3 - vext2xv.d.w $xr0, $xr0 - xvffint.d.l $xr0, $xr0 - xvldi $xr1, -928 - xvrepli.b $xr2, 0 + vext2xv.d.w $xr1, $xr0 + pcalau12i $a5, %pc_hi20(.LCPI0_1) + xvld $xr0, $a5, %pc_lo12(.LCPI0_1) + xvffint.d.l $xr1, $xr1 + xvldi $xr2, -928 + xvrepli.b $xr3, 0 move $a5, $a1 move $a6, $a2 move $a7, $a3 .p2align 4, , 16 .LBB0_44: # %vector.body352 # =>This Inner Loop Header: Depth=1 - xvld $xr3, $a4, 0 - xvpermi.q $xr4, $xr3, 1 - vreplvei.w $vr5, $vr4, 3 - fcvt.d.s $fa5, $fa5 - vreplvei.w $vr6, $vr4, 2 + xvld $xr4, $a4, 0 + xvpermi.q $xr5, $xr4, 1 + vreplvei.w $vr6, $vr5, 3 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr7, $vr5, 2 + fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr6, 16 + vreplvei.w $vr6, $vr5, 1 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr5, 16 - vreplvei.w $vr5, $vr4, 1 + vreplvei.w $vr5, $vr5, 0 fcvt.d.s $fa5, $fa5 + vextrins.d $vr5, $vr6, 16 + xvpermi.q $xr5, $xr7, 2 + vreplvei.w $vr6, $vr4, 3 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr7, $vr4, 2 + fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr6, 16 + vreplvei.w $vr6, $vr4, 1 + fcvt.d.s $fa6, $fa6 vreplvei.w $vr4, $vr4, 0 fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr5, 16 - xvpermi.q $xr4, $xr6, 2 - vreplvei.w $vr5, $vr3, 3 - fcvt.d.s $fa5, $fa5 - vreplvei.w $vr6, $vr3, 2 - fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr5, 16 - vreplvei.w $vr5, $vr3, 1 - fcvt.d.s $fa5, $fa5 - vreplvei.w $vr3, $vr3, 0 - fcvt.d.s $fa3, $fa3 - vextrins.d $vr3, $vr5, 16 - xvpermi.q $xr3, $xr6, 2 - xvfmadd.d $xr4, $xr0, $xr1, $xr4 - xvfmadd.d $xr3, $xr0, $xr1, $xr3 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpermi.q $xr5, $xr6, 2 - xvst $xr5, $a5, 0 - xvst $xr2, $a6, 0 + vextrins.d $vr4, $vr6, 16 + xvpermi.q $xr4, $xr7, 2 + xvfmadd.d $xr5, $xr1, $xr2, $xr5 + xvfmadd.d $xr4, $xr1, $xr2, $xr4 + xvfcvt.s.d $xr4, $xr5, $xr4 + xvperm.w $xr4, $xr4, $xr0 + xvst $xr4, $a5, 0 + xvst $xr3, $a6, 0 addi.d $a7, $a7, -8 addi.d $a6, $a6, 32 addi.d $a5, $a5, 32 @@ -872,8 +824,8 @@ Aalign: # @Aalign move $a3, $zero b .LBB0_78 .LBB0_75: # %vector.ph363 - pcalau12i $a4, %pc_hi20(.LCPI0_1) - vld $vr0, $a4, %pc_lo12(.LCPI0_1) + pcalau12i $a4, %pc_hi20(.LCPI0_2) + vld $vr0, $a4, %pc_lo12(.LCPI0_2) bstrpick.d $a3, $a3, 31, 2 slli.d $a3, $a3, 2 addi.d $a4, $s6, 16 @@ -956,8 +908,8 @@ Aalign: # @Aalign andi $a5, $a4, 12 bstrpick.d $a3, $a4, 31, 4 slli.d $a3, $a3, 4 - pcalau12i $a6, %pc_hi20(.LCPI0_2) - xvld $xr0, $a6, %pc_lo12(.LCPI0_2) + pcalau12i $a6, %pc_hi20(.LCPI0_3) + xvld $xr0, $a6, %pc_lo12(.LCPI0_3) addi.d $a6, $a1, 32 xvrepli.b $xr1, -1 xvrepli.w $xr2, -9 @@ -979,8 +931,8 @@ Aalign: # @Aalign beqz $a5, .LBB0_95 .LBB0_92: # %vec.epilog.ph move $a5, $a3 - pcalau12i $a3, %pc_hi20(.LCPI0_3) - vld $vr0, $a3, %pc_lo12(.LCPI0_3) + pcalau12i $a3, %pc_hi20(.LCPI0_4) + vld $vr0, $a3, %pc_lo12(.LCPI0_4) bstrpick.d $a3, $a4, 31, 2 slli.d $a3, $a3, 2 vreplgr2vr.w $vr1, $a5 @@ -1343,7 +1295,7 @@ Aalign: # @Aalign ld.d $a0, $a0, %pc_lo12(Aalign.mp) pcaddu18i $ra, %call36(FreeIntVec) jirl $ra, $ra, 0 - ld.d $a0, $s6, %pc_lo12(Aalign.mseq) + ld.d $a0, $s8, %pc_lo12(Aalign.mseq) pcaddu18i $ra, %call36(FreeCharMtx) jirl $ra, $ra, 0 ld.d $a0, $sp, 144 # 8-byte Folded Reload @@ -1362,7 +1314,7 @@ Aalign: # @Aalign ld.d $a0, $a0, %pc_lo12(Aalign.intwork) pcaddu18i $ra, %call36(FreeIntMtx) jirl $ra, $ra, 0 - ld.w $s5, $s8, %pc_lo12(Aalign.orlgth1) + ld.w $s5, $s6, %pc_lo12(Aalign.orlgth1) ld.d $a0, $sp, 72 # 8-byte Folded Reload ld.w $s7, $a0, %pc_lo12(Aalign.orlgth2) .LBB0_145: @@ -1443,7 +1395,7 @@ Aalign: # @Aalign add.w $a1, $s7, $s5 pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 - st.d $a0, $s6, %pc_lo12(Aalign.mseq) + st.d $a0, $s8, %pc_lo12(Aalign.mseq) ori $a0, $zero, 26 move $a1, $s4 pcaddu18i $ra, %call36(AllocateFloatMtx) @@ -1481,7 +1433,7 @@ Aalign: # @Aalign ori $a2, $zero, 1 pcaddu18i $ra, %call36(fwrite) jirl $ra, $ra, 0 - st.w $s5, $s8, %pc_lo12(Aalign.orlgth1) + st.w $s5, $s6, %pc_lo12(Aalign.orlgth1) ld.d $a6, $sp, 72 # 8-byte Folded Reload st.w $s7, $a6, %pc_lo12(Aalign.orlgth2) bgtz $s1, .LBB0_5 @@ -1494,12 +1446,12 @@ Aalign: # @Aalign pcalau12i $a0, %got_pc_hi20(commonIP) ld.d $a0, $a0, %got_pc_lo12(commonIP) ld.d $a0, $a0, 0 - move $s6, $a6 + move $s7, $a6 pcaddu18i $ra, %call36(FreeIntMtx) jirl $ra, $ra, 0 - ld.w $s5, $s8, %pc_lo12(Aalign.orlgth1) + ld.w $s5, $s6, %pc_lo12(Aalign.orlgth1) ld.w $a0, $s3, 0 - ld.w $s7, $s6, %pc_lo12(Aalign.orlgth2) + ld.w $s7, $s7, %pc_lo12(Aalign.orlgth2) ld.w $a1, $s4, 0 .LBB0_149: slt $a2, $a0, $s5 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Salignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Salignmm.s index 5c8be14..ed1263d 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Salignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Salignmm.s @@ -458,6 +458,15 @@ imp_match_init_strict: # @imp_match_init_strict .section .rodata.cst32,"aM",@progbits,32 .p2align 5, 0x0 .LCPI3_1: + .word 0 # 0x0 + .word 1 # 0x1 + .word 4 # 0x4 + .word 5 # 0x5 + .word 2 # 0x2 + .word 3 # 0x3 + .word 6 # 0x6 + .word 7 # 0x7 +.LCPI3_2: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 @@ -466,17 +475,17 @@ imp_match_init_strict: # @imp_match_init_strict .word 6 # 0x6 .word 7 # 0x7 .word 8 # 0x8 -.LCPI3_2: +.LCPI3_3: .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 .dword 8 # 0x8 -.LCPI3_3: +.LCPI3_4: .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 .dword 4 # 0x4 -.LCPI3_6: +.LCPI3_7: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -487,15 +496,15 @@ imp_match_init_strict: # @imp_match_init_strict .word 7 # 0x7 .section .rodata.cst16,"aM",@progbits,16 .p2align 4, 0x0 -.LCPI3_4: +.LCPI3_5: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 .word 4 # 0x4 -.LCPI3_5: +.LCPI3_6: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI3_7: +.LCPI3_8: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -1232,73 +1241,54 @@ A__align: # @A__align ori $a5, $zero, 1 move $a3, $a2 bstrins.d $a3, $a5, 2, 0 + pcalau12i $a5, %pc_hi20(.LCPI3_2) + xvld $xr0, $a5, %pc_lo12(.LCPI3_2) pcalau12i $a5, %pc_hi20(.LCPI3_1) - xvld $xr0, $a5, %pc_lo12(.LCPI3_1) - xvreplgr2vr.w $xr1, $a0 + xvld $xr1, $a5, %pc_lo12(.LCPI3_1) + xvreplgr2vr.w $xr2, $a0 addi.d $a5, $s5, 4 - xvldi $xr2, -800 + xvldi $xr3, -800 move $a6, $a4 .p2align 4, , 16 .LBB3_52: # %vector.body605 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr1, $xr0 - xvpermi.q $xr4, $xr3, 1 + xvmul.w $xr4, $xr2, $xr0 + xvpermi.q $xr5, $xr4, 1 + vext2xv.d.w $xr5, $xr5 + xvffint.d.l $xr5, $xr5 vext2xv.d.w $xr4, $xr4 + xvld $xr6, $a5, 0 xvffint.d.l $xr4, $xr4 - vext2xv.d.w $xr3, $xr3 - xvld $xr5, $a5, 0 - xvffint.d.l $xr3, $xr3 - xvfmul.d $xr3, $xr3, $xr2 - xvfmul.d $xr4, $xr4, $xr2 - xvpermi.q $xr6, $xr5, 1 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + xvfmul.d $xr4, $xr4, $xr3 + xvfmul.d $xr5, $xr5, $xr3 + xvpermi.q $xr7, $xr6, 1 + vreplvei.w $vr8, $vr7, 3 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr7, 1 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr8, 16 + xvpermi.q $xr7, $xr9, 2 + vreplvei.w $vr8, $vr6, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 + fcvt.d.s $ft0, $ft0 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvpermi.q $xr6, $xr8, 2 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 + xvfadd.d $xr5, $xr7, $xr5 xvfadd.d $xr4, $xr6, $xr4 - xvfadd.d $xr3, $xr5, $xr3 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpermi.q $xr5, $xr6, 2 - xvst $xr5, $a5, 0 + xvfcvt.s.d $xr4, $xr5, $xr4 + xvperm.w $xr4, $xr4, $xr1 + xvst $xr4, $a5, 0 xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 @@ -1416,74 +1406,55 @@ A__align: # @A__align ori $a5, $zero, 1 move $a3, $a2 bstrins.d $a3, $a5, 2, 0 + pcalau12i $a5, %pc_hi20(.LCPI3_2) + xvld $xr0, $a5, %pc_lo12(.LCPI3_2) pcalau12i $a5, %pc_hi20(.LCPI3_1) - xvld $xr0, $a5, %pc_lo12(.LCPI3_1) - xvreplgr2vr.w $xr1, $a0 + xvld $xr1, $a5, %pc_lo12(.LCPI3_1) + xvreplgr2vr.w $xr2, $a0 ld.d $a5, $sp, 344 # 8-byte Folded Reload addi.d $a5, $a5, 4 - xvldi $xr2, -800 + xvldi $xr3, -800 move $a6, $a4 .p2align 4, , 16 .LBB3_68: # %vector.body620 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr1, $xr0 - xvpermi.q $xr4, $xr3, 1 + xvmul.w $xr4, $xr2, $xr0 + xvpermi.q $xr5, $xr4, 1 + vext2xv.d.w $xr5, $xr5 + xvffint.d.l $xr5, $xr5 vext2xv.d.w $xr4, $xr4 + xvld $xr6, $a5, 0 xvffint.d.l $xr4, $xr4 - vext2xv.d.w $xr3, $xr3 - xvld $xr5, $a5, 0 - xvffint.d.l $xr3, $xr3 - xvfmul.d $xr3, $xr3, $xr2 - xvfmul.d $xr4, $xr4, $xr2 - xvpermi.q $xr6, $xr5, 1 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + xvfmul.d $xr4, $xr4, $xr3 + xvfmul.d $xr5, $xr5, $xr3 + xvpermi.q $xr7, $xr6, 1 + vreplvei.w $vr8, $vr7, 3 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr7, 1 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr8, 16 + xvpermi.q $xr7, $xr9, 2 + vreplvei.w $vr8, $vr6, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 + fcvt.d.s $ft0, $ft0 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvpermi.q $xr6, $xr8, 2 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 + xvfadd.d $xr5, $xr7, $xr5 xvfadd.d $xr4, $xr6, $xr4 - xvfadd.d $xr3, $xr5, $xr3 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpermi.q $xr5, $xr6, 2 - xvst $xr5, $a5, 0 + xvfcvt.s.d $xr4, $xr5, $xr4 + xvperm.w $xr4, $xr4, $xr1 + xvst $xr4, $a5, 0 xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 @@ -1653,10 +1624,10 @@ A__align: # @A__align # Child Loop BB3_104 Depth 2 # Child Loop BB3_87 Depth 2 # Child Loop BB3_91 Depth 2 - addi.d $s6, $s7, -1 - slli.d $s4, $s6, 2 + addi.d $s4, $s7, -1 + slli.d $s6, $s4, 2 ld.d $a0, $sp, 344 # 8-byte Folded Reload - fldx.s $fa0, $a0, $s4 + fldx.s $fa0, $a0, $s6 move $s3, $s2 move $s2, $s5 fst.s $fa0, $s5, 0 @@ -1722,7 +1693,7 @@ A__align: # @A__align ld.d $a4, $sp, 264 # 8-byte Folded Reload fld.s $fa3, $a4, 4 ld.d $a4, $sp, 240 # 8-byte Folded Reload - fldx.s $fa0, $a4, $s4 + fldx.s $fa0, $a4, $s6 ld.d $a4, $sp, 256 # 8-byte Folded Reload ldx.d $a4, $a4, $a0 ld.d $a0, $sp, 232 # 8-byte Folded Reload @@ -1790,7 +1761,7 @@ A__align: # @A__align bcnez $fcc0, .LBB3_90 # %bb.96: # in Loop: Header=BB3_91 Depth=2 fst.s $fa6, $t0, 0 - st.w $s6, $a7, 0 + st.w $s4, $a7, 0 b .LBB3_90 .LBB3_97: # %vector.main.loop.iter.check790 # in Loop: Header=BB3_82 Depth=1 @@ -1898,19 +1869,29 @@ A__align: # @A__align move $a3, $a2 bstrins.d $a3, $a5, 2, 0 xvreplgr2vr.w $xr0, $a0 - pcalau12i $a5, %pc_hi20(.LCPI3_2) - xvld $xr1, $a5, %pc_lo12(.LCPI3_2) pcalau12i $a5, %pc_hi20(.LCPI3_3) - xvld $xr2, $a5, %pc_lo12(.LCPI3_3) - xvreplgr2vr.d $xr3, $t3 + xvld $xr1, $a5, %pc_lo12(.LCPI3_3) + pcalau12i $a5, %pc_hi20(.LCPI3_4) + xvld $xr2, $a5, %pc_lo12(.LCPI3_4) + pcalau12i $a5, %pc_hi20(.LCPI3_1) + xvld $xr3, $a5, %pc_lo12(.LCPI3_1) + xvreplgr2vr.d $xr4, $t3 addi.d $a5, $s5, 4 - xvldi $xr4, -800 + xvldi $xr5, -800 move $a6, $a4 .p2align 4, , 16 .LBB3_112: # %vector.body840 # =>This Inner Loop Header: Depth=1 - xvsub.d $xr5, $xr3, $xr1 - xvsub.d $xr6, $xr3, $xr2 + xvsub.d $xr6, $xr4, $xr1 + xvsub.d $xr7, $xr4, $xr2 + xvpickve2gr.d $a7, $xr7, 0 + vinsgr2vr.w $vr8, $a7, 0 + xvpickve2gr.d $a7, $xr7, 1 + vinsgr2vr.w $vr8, $a7, 1 + xvpickve2gr.d $a7, $xr7, 2 + vinsgr2vr.w $vr8, $a7, 2 + xvpickve2gr.d $a7, $xr7, 3 + vinsgr2vr.w $vr8, $a7, 3 xvpickve2gr.d $a7, $xr6, 0 vinsgr2vr.w $vr7, $a7, 0 xvpickve2gr.d $a7, $xr6, 1 @@ -1919,73 +1900,44 @@ A__align: # @A__align vinsgr2vr.w $vr7, $a7, 2 xvpickve2gr.d $a7, $xr6, 3 vinsgr2vr.w $vr7, $a7, 3 - xvpickve2gr.d $a7, $xr5, 0 - vinsgr2vr.w $vr6, $a7, 0 - xvpickve2gr.d $a7, $xr5, 1 - vinsgr2vr.w $vr6, $a7, 1 - xvpickve2gr.d $a7, $xr5, 2 - vinsgr2vr.w $vr6, $a7, 2 - xvpickve2gr.d $a7, $xr5, 3 - vinsgr2vr.w $vr6, $a7, 3 - xvpermi.q $xr7, $xr6, 2 - xvmul.w $xr5, $xr0, $xr7 - xvpermi.q $xr6, $xr5, 1 + xvpermi.q $xr8, $xr7, 2 + xvmul.w $xr6, $xr0, $xr8 + xvpermi.q $xr7, $xr6, 1 + vext2xv.d.w $xr7, $xr7 + xvffint.d.l $xr7, $xr7 vext2xv.d.w $xr6, $xr6 + xvld $xr8, $a5, 0 xvffint.d.l $xr6, $xr6 - vext2xv.d.w $xr5, $xr5 - xvld $xr7, $a5, 0 - xvffint.d.l $xr5, $xr5 - xvfmul.d $xr5, $xr5, $xr4 - xvfmul.d $xr6, $xr6, $xr4 - xvpermi.q $xr8, $xr7, 1 - vreplvei.w $vr9, $vr8, 3 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr10, $vr8, 2 + xvfmul.d $xr6, $xr6, $xr5 + xvfmul.d $xr7, $xr7, $xr5 + xvpermi.q $xr9, $xr8, 1 + vreplvei.w $vr10, $vr9, 3 fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr9, 16 - vreplvei.w $vr9, $vr8, 1 + vreplvei.w $vr11, $vr9, 2 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr10, 16 + vreplvei.w $vr10, $vr9, 1 + fcvt.d.s $ft2, $ft2 + vreplvei.w $vr9, $vr9, 0 fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr10, 16 + xvpermi.q $xr9, $xr11, 2 + vreplvei.w $vr10, $vr8, 3 + fcvt.d.s $ft2, $ft2 + vreplvei.w $vr11, $vr8, 2 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr10, 16 + vreplvei.w $vr10, $vr8, 1 + fcvt.d.s $ft2, $ft2 vreplvei.w $vr8, $vr8, 0 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr9, 16 - xvpermi.q $xr8, $xr10, 2 - vreplvei.w $vr9, $vr7, 3 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr10, $vr7, 2 - fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr9, 16 - vreplvei.w $vr9, $vr7, 1 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr7, $vr7, 0 - fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr9, 16 - xvpermi.q $xr7, $xr10, 2 + vextrins.d $vr8, $vr10, 16 + xvpermi.q $xr8, $xr11, 2 + xvfadd.d $xr7, $xr9, $xr7 xvfadd.d $xr6, $xr8, $xr6 - xvfadd.d $xr5, $xr7, $xr5 - xvpickve.d $xr7, $xr6, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr6, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr6, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr6, $xr6, 3 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr8, $vr6, 48 - xvpickve.d $xr6, $xr5, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr5, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr5, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 48 - xvpermi.q $xr7, $xr8, 2 - xvst $xr7, $a5, 0 + xvfcvt.s.d $xr6, $xr7, $xr6 + xvperm.w $xr6, $xr6, $xr3 + xvst $xr6, $a5, 0 xvaddi.du $xr2, $xr2, 8 xvaddi.du $xr1, $xr1, 8 addi.d $a6, $a6, -8 @@ -2042,8 +1994,8 @@ A__align: # @A__align move $a0, $a3 bstrins.d $a0, $a5, 1, 0 xvreplve0.d $xr2, $xr0 - pcalau12i $a5, %pc_hi20(.LCPI3_4) - vld $vr3, $a5, %pc_lo12(.LCPI3_4) + pcalau12i $a5, %pc_hi20(.LCPI3_5) + vld $vr3, $a5, %pc_lo12(.LCPI3_5) xvreplve0.d $xr4, $xr1 addi.d $a5, $a1, 4 xvldi $xr5, -800 @@ -2068,18 +2020,9 @@ A__align: # @A__align vextrins.d $vr7, $vr8, 16 xvpermi.q $xr7, $xr9, 2 xvfmadd.d $xr6, $xr4, $xr6, $xr7 - xvpickve.d $xr7, $xr6, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr6, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr6, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr6, $xr6, 3 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr8, $vr6, 48 - vst $vr8, $a5, 0 + xvpermi.q $xr7, $xr6, 1 + vfcvt.s.d $vr6, $vr7, $vr6 + vst $vr6, $a5, 0 vaddi.wu $vr3, $vr3, 4 addi.d $a6, $a6, -4 addi.d $a5, $a5, 16 @@ -2248,8 +2191,8 @@ A__align: # @A__align move $a3, $zero b .LBB3_146 .LBB3_143: # %vector.ph871 - pcalau12i $a4, %pc_hi20(.LCPI3_5) - vld $vr0, $a4, %pc_lo12(.LCPI3_5) + pcalau12i $a4, %pc_hi20(.LCPI3_6) + vld $vr0, $a4, %pc_lo12(.LCPI3_6) bstrpick.d $a3, $a3, 31, 2 slli.d $a3, $a3, 2 addi.d $a4, $s8, 16 @@ -2310,123 +2253,83 @@ A__align: # @A__align .LBB3_153: # %vector.ph bstrpick.d $a3, $s2, 30, 3 slli.d $a3, $a3, 3 - xvreplve0.d $xr1, $xr0 - xvldi $xr2, -912 - xvldi $xr3, -928 + pcalau12i $a4, %pc_hi20(.LCPI3_1) + xvld $xr1, $a4, %pc_lo12(.LCPI3_1) + xvreplve0.d $xr2, $xr0 + xvldi $xr3, -912 + xvldi $xr4, -928 move $a4, $a0 move $a5, $a2 move $a6, $a3 .p2align 4, , 16 .LBB3_154: # %vector.body # =>This Inner Loop Header: Depth=1 - xvld $xr4, $a4, 0 - xvpermi.q $xr5, $xr4, 1 - vreplvei.w $vr6, $vr5, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr5, 2 + xvld $xr5, $a4, 0 + xvpermi.q $xr6, $xr5, 1 + vreplvei.w $vr7, $vr6, 3 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr5, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr6, 16 - xvpermi.q $xr5, $xr7, 2 - vreplvei.w $vr6, $vr4, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr4, 2 + vreplvei.w $vr8, $vr6, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr7, 16 + vreplvei.w $vr7, $vr6, 1 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr4, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr4, $vr4, 0 - fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr6, 16 - xvpermi.q $xr4, $xr7, 2 - xvfsub.d $xr5, $xr2, $xr5 - xvfsub.d $xr4, $xr2, $xr4 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr4, $xr4, $xr3 - xvfmul.d $xr5, $xr5, $xr1 - xvfmul.d $xr4, $xr4, $xr1 - xvpickve.d $xr6, $xr5, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr5, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr5, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 48 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - xvld $xr5, $a5, 0 - vextrins.w $vr6, $vr4, 48 - xvpermi.q $xr6, $xr7, 2 - xvst $xr6, $a4, 0 - xvpermi.q $xr4, $xr5, 1 - vreplvei.w $vr6, $vr4, 3 + vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr4, 2 + vextrins.d $vr6, $vr7, 16 + xvpermi.q $xr6, $xr8, 2 + vreplvei.w $vr7, $vr5, 3 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr4, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr4, $vr4, 0 - fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr6, 16 - xvpermi.q $xr4, $xr7, 2 + vreplvei.w $vr8, $vr5, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr7, 16 + vreplvei.w $vr7, $vr5, 1 + fcvt.d.s $fa7, $fa7 + vreplvei.w $vr5, $vr5, 0 + fcvt.d.s $fa5, $fa5 + vextrins.d $vr5, $vr7, 16 + xvpermi.q $xr5, $xr8, 2 + xvfsub.d $xr6, $xr3, $xr6 + xvfsub.d $xr5, $xr3, $xr5 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr5, $xr5, $xr4 + xvfmul.d $xr6, $xr6, $xr2 + xvfmul.d $xr5, $xr5, $xr2 + xvld $xr7, $a5, 0 + xvfcvt.s.d $xr5, $xr6, $xr5 + xvperm.w $xr5, $xr5, $xr1 + xvst $xr5, $a4, 0 + xvpermi.q $xr5, $xr7, 1 vreplvei.w $vr6, $vr5, 3 fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr5, 2 - fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 + vreplvei.w $vr8, $vr5, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr6, 16 vreplvei.w $vr6, $vr5, 1 fcvt.d.s $fa6, $fa6 vreplvei.w $vr5, $vr5, 0 fcvt.d.s $fa5, $fa5 vextrins.d $vr5, $vr6, 16 - xvpermi.q $xr5, $xr7, 2 - xvfsub.d $xr4, $xr2, $xr4 - xvfsub.d $xr5, $xr2, $xr5 - xvfmul.d $xr4, $xr4, $xr3 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr4, $xr4, $xr1 - xvfmul.d $xr5, $xr5, $xr1 - xvpickve.d $xr6, $xr4, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr4, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr4, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr7, $vr4, 48 - xvpickve.d $xr4, $xr5, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr6, $xr5, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr4, 16 - xvpickve.d $xr4, $xr5, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 32 - xvpickve.d $xr4, $xr5, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpermi.q $xr6, $xr7, 2 - xvst $xr6, $a5, 0 + xvpermi.q $xr5, $xr8, 2 + vreplvei.w $vr6, $vr7, 3 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr8, $vr7, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr6, 16 + vreplvei.w $vr6, $vr7, 1 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr7, $vr7, 0 + fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr6, 16 + xvpermi.q $xr7, $xr8, 2 + xvfsub.d $xr5, $xr3, $xr5 + xvfsub.d $xr6, $xr3, $xr7 + xvfmul.d $xr5, $xr5, $xr4 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr5, $xr5, $xr2 + xvfmul.d $xr6, $xr6, $xr2 + xvfcvt.s.d $xr5, $xr5, $xr6 + xvperm.w $xr5, $xr5, $xr1 + xvst $xr5, $a5, 0 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 addi.d $a4, $a4, 32 @@ -2437,128 +2340,88 @@ A__align: # @A__align .LBB3_156: # %vector.ph546 bstrpick.d $a3, $s0, 30, 3 slli.d $a3, $a3, 3 - xvreplve0.d $xr1, $xr0 - xvldi $xr2, -912 - xvldi $xr3, -928 + pcalau12i $a4, %pc_hi20(.LCPI3_1) + xvld $xr1, $a4, %pc_lo12(.LCPI3_1) + xvreplve0.d $xr2, $xr0 + xvldi $xr3, -912 + xvldi $xr4, -928 move $a4, $a0 move $a5, $a2 move $a6, $a3 .p2align 4, , 16 .LBB3_157: # %vector.body551 # =>This Inner Loop Header: Depth=1 - xvld $xr4, $a4, 0 - xvpermi.q $xr5, $xr4, 1 - vreplvei.w $vr6, $vr5, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr5, 2 + xvld $xr5, $a4, 0 + xvpermi.q $xr6, $xr5, 1 + vreplvei.w $vr7, $vr6, 3 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr5, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr6, 16 - xvpermi.q $xr5, $xr7, 2 - vreplvei.w $vr6, $vr4, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr4, 2 + vreplvei.w $vr8, $vr6, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr7, 16 + vreplvei.w $vr7, $vr6, 1 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr4, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr4, $vr4, 0 - fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr6, 16 - xvpermi.q $xr4, $xr7, 2 - xvfsub.d $xr5, $xr2, $xr5 - xvfsub.d $xr4, $xr2, $xr4 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr4, $xr4, $xr3 - xvfmul.d $xr5, $xr5, $xr1 - xvfmul.d $xr4, $xr4, $xr1 - xvpickve.d $xr6, $xr5, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr5, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr5, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 48 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - xvld $xr5, $a5, 0 - vextrins.w $vr6, $vr4, 48 - xvpermi.q $xr6, $xr7, 2 - xvst $xr6, $a4, 0 - xvpermi.q $xr4, $xr5, 1 - vreplvei.w $vr6, $vr4, 3 + vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr4, 2 + vextrins.d $vr6, $vr7, 16 + xvpermi.q $xr6, $xr8, 2 + vreplvei.w $vr7, $vr5, 3 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr4, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr4, $vr4, 0 - fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr6, 16 - xvpermi.q $xr4, $xr7, 2 + vreplvei.w $vr8, $vr5, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr7, 16 + vreplvei.w $vr7, $vr5, 1 + fcvt.d.s $fa7, $fa7 + vreplvei.w $vr5, $vr5, 0 + fcvt.d.s $fa5, $fa5 + vextrins.d $vr5, $vr7, 16 + xvpermi.q $xr5, $xr8, 2 + xvfsub.d $xr6, $xr3, $xr6 + xvfsub.d $xr5, $xr3, $xr5 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr5, $xr5, $xr4 + xvfmul.d $xr6, $xr6, $xr2 + xvfmul.d $xr5, $xr5, $xr2 + xvld $xr7, $a5, 0 + xvfcvt.s.d $xr5, $xr6, $xr5 + xvperm.w $xr5, $xr5, $xr1 + xvst $xr5, $a4, 0 + xvpermi.q $xr5, $xr7, 1 vreplvei.w $vr6, $vr5, 3 fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr5, 2 - fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 + vreplvei.w $vr8, $vr5, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr6, 16 vreplvei.w $vr6, $vr5, 1 fcvt.d.s $fa6, $fa6 vreplvei.w $vr5, $vr5, 0 fcvt.d.s $fa5, $fa5 vextrins.d $vr5, $vr6, 16 - xvpermi.q $xr5, $xr7, 2 - xvfsub.d $xr4, $xr2, $xr4 - xvfsub.d $xr5, $xr2, $xr5 - xvfmul.d $xr4, $xr4, $xr3 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr4, $xr4, $xr1 - xvfmul.d $xr5, $xr5, $xr1 - xvpickve.d $xr6, $xr4, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr4, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr4, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr7, $vr4, 48 - xvpickve.d $xr4, $xr5, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr6, $xr5, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr4, 16 - xvpickve.d $xr4, $xr5, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 32 - xvpickve.d $xr4, $xr5, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpermi.q $xr6, $xr7, 2 - xvst $xr6, $a5, 0 - addi.d $a6, $a6, -8 - addi.d $a5, $a5, 32 - addi.d $a4, $a4, 32 - bnez $a6, .LBB3_157 -# %bb.158: # %middle.block556 + xvpermi.q $xr5, $xr8, 2 + vreplvei.w $vr6, $vr7, 3 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr8, $vr7, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr6, 16 + vreplvei.w $vr6, $vr7, 1 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr7, $vr7, 0 + fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr6, 16 + xvpermi.q $xr7, $xr8, 2 + xvfsub.d $xr5, $xr3, $xr5 + xvfsub.d $xr6, $xr3, $xr7 + xvfmul.d $xr5, $xr5, $xr4 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr5, $xr5, $xr2 + xvfmul.d $xr6, $xr6, $xr2 + xvfcvt.s.d $xr5, $xr5, $xr6 + xvperm.w $xr5, $xr5, $xr1 + xvst $xr5, $a5, 0 + addi.d $a6, $a6, -8 + addi.d $a5, $a5, 32 + addi.d $a4, $a4, 32 + bnez $a6, .LBB3_157 +# %bb.158: # %middle.block556 beq $a1, $a3, .LBB3_37 b .LBB3_35 .LBB3_159: # %vector.memcheck630 @@ -2620,8 +2483,8 @@ A__align: # @A__align andi $a7, $a6, 12 bstrpick.d $a5, $a6, 31, 4 slli.d $a5, $a5, 4 - pcalau12i $t0, %pc_hi20(.LCPI3_6) - xvld $xr0, $t0, %pc_lo12(.LCPI3_6) + pcalau12i $t0, %pc_hi20(.LCPI3_7) + xvld $xr0, $t0, %pc_lo12(.LCPI3_7) addi.d $t0, $a3, 32 xvrepli.b $xr1, -1 xvrepli.w $xr2, -9 @@ -2643,8 +2506,8 @@ A__align: # @A__align beqz $a7, .LBB3_177 .LBB3_174: # %vec.epilog.ph905 move $a7, $a5 - pcalau12i $a5, %pc_hi20(.LCPI3_7) - vld $vr0, $a5, %pc_lo12(.LCPI3_7) + pcalau12i $a5, %pc_hi20(.LCPI3_8) + vld $vr0, $a5, %pc_lo12(.LCPI3_8) bstrpick.d $a5, $a6, 31, 2 slli.d $a5, $a5, 2 vreplgr2vr.w $vr1, $a7 @@ -4338,6 +4201,15 @@ Atracking: # @Atracking .section .rodata.cst32,"aM",@progbits,32 .p2align 5, 0x0 .LCPI6_1: + .word 0 # 0x0 + .word 1 # 0x1 + .word 4 # 0x4 + .word 5 # 0x5 + .word 2 # 0x2 + .word 3 # 0x3 + .word 6 # 0x6 + .word 7 # 0x7 +.LCPI6_2: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 @@ -4346,17 +4218,17 @@ Atracking: # @Atracking .word 6 # 0x6 .word 7 # 0x7 .word 8 # 0x8 -.LCPI6_2: +.LCPI6_3: .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 .dword 8 # 0x8 -.LCPI6_3: +.LCPI6_4: .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 .dword 4 # 0x4 -.LCPI6_6: +.LCPI6_7: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -4367,15 +4239,15 @@ Atracking: # @Atracking .word 7 # 0x7 .section .rodata.cst16,"aM",@progbits,16 .p2align 4, 0x0 -.LCPI6_4: +.LCPI6_5: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 .word 4 # 0x4 -.LCPI6_5: +.LCPI6_6: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI6_7: +.LCPI6_8: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -4405,8 +4277,8 @@ A__align_gapmap: # @A__align_gapmap st.d $a6, $sp, 32 # 8-byte Folded Spill move $s6, $a5 move $s1, $a4 - st.d $a3, $sp, 232 # 8-byte Folded Spill - st.d $a2, $sp, 208 # 8-byte Folded Spill + st.d $a3, $sp, 208 # 8-byte Folded Spill + st.d $a2, $sp, 200 # 8-byte Folded Spill move $s0, $a1 move $s2, $a0 pcalau12i $a0, %pc_hi20(A__align_gapmap.mseq1) @@ -4472,9 +4344,9 @@ A__align_gapmap: # @A__align_gapmap pcalau12i $a0, %pc_hi20(A__align_gapmap.cpmx2) st.d $a0, $sp, 256 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(A__align_gapmap.floatwork) - st.d $a0, $sp, 224 # 8-byte Folded Spill + st.d $a0, $sp, 232 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(A__align_gapmap.intwork) - st.d $a0, $sp, 144 # 8-byte Folded Spill + st.d $a0, $sp, 224 # 8-byte Folded Spill st.d $s6, $sp, 48 # 8-byte Folded Spill st.d $s1, $sp, 104 # 8-byte Folded Spill st.d $fp, $sp, 72 # 8-byte Folded Spill @@ -4543,11 +4415,11 @@ A__align_gapmap: # @A__align_gapmap ld.d $a0, $a0, %pc_lo12(A__align_gapmap.cpmx2) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 224 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(A__align_gapmap.floatwork) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(A__align_gapmap.intwork) pcaddu18i $ra, %call36(FreeIntMtx) jirl $ra, $ra, 0 @@ -4580,7 +4452,7 @@ A__align_gapmap: # @A__align_gapmap maskeqz $a1, $s3, $a1 or $s3, $a1, $a0 addi.w $s1, $s3, 100 - st.d $s2, $sp, 200 # 8-byte Folded Spill + st.d $s2, $sp, 144 # 8-byte Folded Spill move $s2, $s5 addi.w $s5, $s3, 102 move $a0, $s5 @@ -4666,16 +4538,16 @@ A__align_gapmap: # @A__align_gapmap move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 224 # 8-byte Folded Reload + ld.d $a1, $sp, 232 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(A__align_gapmap.floatwork) ori $a1, $zero, 26 move $a0, $s5 move $s5, $s2 - ld.d $s2, $sp, 200 # 8-byte Folded Reload + ld.d $s2, $sp, 144 # 8-byte Folded Reload pcaddu18i $ra, %call36(AllocateIntMtx) jirl $ra, $ra, 0 move $a5, $fp - ld.d $a1, $sp, 144 # 8-byte Folded Reload + ld.d $a1, $sp, 224 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(A__align_gapmap.intwork) st.w $s4, $s5, %pc_lo12(A__align_gapmap.orlgth1) st.w $s3, $s2, %pc_lo12(A__align_gapmap.orlgth2) @@ -4793,7 +4665,7 @@ A__align_gapmap: # @A__align_gapmap addi.w $a3, $a0, 0 move $a0, $s0 move $a1, $s5 - ld.d $s2, $sp, 208 # 8-byte Folded Reload + ld.d $s2, $sp, 200 # 8-byte Folded Reload move $a2, $s2 ld.d $fp, $sp, 104 # 8-byte Folded Reload move $a4, $fp @@ -4807,7 +4679,7 @@ A__align_gapmap: # @A__align_gapmap addi.w $a3, $a0, 0 move $a0, $s7 move $a1, $s5 - ld.d $s5, $sp, 232 # 8-byte Folded Reload + ld.d $s5, $sp, 208 # 8-byte Folded Reload move $a2, $s5 move $a4, $s6 pcaddu18i $ra, %call36(cpmx_calc_new) @@ -4946,32 +4818,31 @@ A__align_gapmap: # @A__align_gapmap ld.d $a0, $sp, 240 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(A__align_gapmap.initverticalw) ld.d $a1, $sp, 256 # 8-byte Folded Reload - ld.d $s6, $a1, %pc_lo12(A__align_gapmap.cpmx2) + ld.d $s0, $a1, %pc_lo12(A__align_gapmap.cpmx2) ld.d $a1, $sp, 280 # 8-byte Folded Reload - ld.d $s0, $a1, %pc_lo12(A__align_gapmap.cpmx1) + ld.d $s4, $a1, %pc_lo12(A__align_gapmap.cpmx1) + ld.d $a1, $sp, 232 # 8-byte Folded Reload + ld.d $s3, $a1, %pc_lo12(A__align_gapmap.floatwork) ld.d $a1, $sp, 224 # 8-byte Folded Reload - ld.d $s4, $a1, %pc_lo12(A__align_gapmap.floatwork) - ld.d $a1, $sp, 144 # 8-byte Folded Reload - ld.d $s3, $a1, %pc_lo12(A__align_gapmap.intwork) + ld.d $s6, $a1, %pc_lo12(A__align_gapmap.intwork) ori $a7, $zero, 1 st.d $a0, $sp, 280 # 8-byte Folded Spill - move $a1, $s6 - move $a2, $s0 + move $a1, $s0 + move $a2, $s4 move $a3, $zero move $a4, $fp - move $a5, $s4 - move $a6, $s3 + move $a5, $s3 + move $a6, $s6 pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 pcalau12i $a1, %pc_hi20(impmtx) ld.d $a0, $sp, 216 # 8-byte Folded Reload slli.d $a0, $a0, 32 - st.d $a0, $sp, 224 # 8-byte Folded Spill + st.d $a0, $sp, 232 # 8-byte Folded Spill ld.d $a0, $sp, 264 # 8-byte Folded Reload - st.d $s4, $sp, 256 # 8-byte Folded Spill + st.d $s4, $sp, 248 # 8-byte Folded Spill st.d $a1, $sp, 120 # 8-byte Folded Spill - st.d $s6, $sp, 232 # 8-byte Folded Spill - st.d $s0, $sp, 248 # 8-byte Folded Spill + st.d $s0, $sp, 256 # 8-byte Folded Spill beqz $a0, .LBB6_44 # %bb.38: move $a0, $fp @@ -5003,11 +4874,11 @@ A__align_gapmap: # @A__align_gapmap ori $s0, $zero, 1 move $a0, $s5 ld.d $a1, $sp, 248 # 8-byte Folded Reload - move $a2, $s6 + ld.d $a2, $sp, 256 # 8-byte Folded Reload move $a3, $zero move $a4, $s8 - ld.d $a5, $sp, 256 # 8-byte Folded Reload - move $a6, $s3 + move $a5, $s3 + move $a6, $s6 pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 beqz $s8, .LBB6_53 @@ -5038,12 +4909,12 @@ A__align_gapmap: # @A__align_gapmap .LBB6_44: # %.critedge ori $a7, $zero, 1 move $a0, $s5 - move $a1, $s0 - move $a2, $s6 + move $a1, $s4 + move $a2, $s0 move $a3, $zero move $a4, $s8 - move $a5, $s4 - move $a6, $s3 + move $a5, $s3 + move $a6, $s6 pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 .LBB6_45: # %imp_match_out_vead_gapmap.exit @@ -5071,73 +4942,54 @@ A__align_gapmap: # @A__align_gapmap ori $a5, $zero, 1 move $a3, $a2 bstrins.d $a3, $a5, 2, 0 + pcalau12i $a5, %pc_hi20(.LCPI6_2) + xvld $xr0, $a5, %pc_lo12(.LCPI6_2) pcalau12i $a5, %pc_hi20(.LCPI6_1) - xvld $xr0, $a5, %pc_lo12(.LCPI6_1) - xvreplgr2vr.w $xr1, $a0 + xvld $xr1, $a5, %pc_lo12(.LCPI6_1) + xvreplgr2vr.w $xr2, $a0 addi.d $a5, $s5, 4 - xvldi $xr2, -800 + xvldi $xr3, -800 move $a6, $a4 .p2align 4, , 16 .LBB6_49: # %vector.body544 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr1, $xr0 - xvpermi.q $xr4, $xr3, 1 + xvmul.w $xr4, $xr2, $xr0 + xvpermi.q $xr5, $xr4, 1 + vext2xv.d.w $xr5, $xr5 + xvffint.d.l $xr5, $xr5 vext2xv.d.w $xr4, $xr4 + xvld $xr6, $a5, 0 xvffint.d.l $xr4, $xr4 - vext2xv.d.w $xr3, $xr3 - xvld $xr5, $a5, 0 - xvffint.d.l $xr3, $xr3 - xvfmul.d $xr3, $xr3, $xr2 - xvfmul.d $xr4, $xr4, $xr2 - xvpermi.q $xr6, $xr5, 1 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + xvfmul.d $xr4, $xr4, $xr3 + xvfmul.d $xr5, $xr5, $xr3 + xvpermi.q $xr7, $xr6, 1 + vreplvei.w $vr8, $vr7, 3 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr7, 1 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr8, 16 + xvpermi.q $xr7, $xr9, 2 + vreplvei.w $vr8, $vr6, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 + fcvt.d.s $ft0, $ft0 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvpermi.q $xr6, $xr8, 2 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 + xvfadd.d $xr5, $xr7, $xr5 xvfadd.d $xr4, $xr6, $xr4 - xvfadd.d $xr3, $xr5, $xr3 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpermi.q $xr5, $xr6, 2 - xvst $xr5, $a5, 0 + xvfcvt.s.d $xr4, $xr5, $xr4 + xvperm.w $xr4, $xr4, $xr1 + xvst $xr4, $a5, 0 xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 @@ -5254,74 +5106,55 @@ A__align_gapmap: # @A__align_gapmap ori $a5, $zero, 1 move $a3, $a2 bstrins.d $a3, $a5, 2, 0 + pcalau12i $a5, %pc_hi20(.LCPI6_2) + xvld $xr0, $a5, %pc_lo12(.LCPI6_2) pcalau12i $a5, %pc_hi20(.LCPI6_1) - xvld $xr0, $a5, %pc_lo12(.LCPI6_1) - xvreplgr2vr.w $xr1, $a0 + xvld $xr1, $a5, %pc_lo12(.LCPI6_1) + xvreplgr2vr.w $xr2, $a0 ld.d $a5, $sp, 280 # 8-byte Folded Reload addi.d $a5, $a5, 4 - xvldi $xr2, -800 + xvldi $xr3, -800 move $a6, $a4 .p2align 4, , 16 .LBB6_65: # %vector.body558 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr1, $xr0 - xvpermi.q $xr4, $xr3, 1 + xvmul.w $xr4, $xr2, $xr0 + xvpermi.q $xr5, $xr4, 1 + vext2xv.d.w $xr5, $xr5 + xvffint.d.l $xr5, $xr5 vext2xv.d.w $xr4, $xr4 + xvld $xr6, $a5, 0 xvffint.d.l $xr4, $xr4 - vext2xv.d.w $xr3, $xr3 - xvld $xr5, $a5, 0 - xvffint.d.l $xr3, $xr3 - xvfmul.d $xr3, $xr3, $xr2 - xvfmul.d $xr4, $xr4, $xr2 - xvpermi.q $xr6, $xr5, 1 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + xvfmul.d $xr4, $xr4, $xr3 + xvfmul.d $xr5, $xr5, $xr3 + xvpermi.q $xr7, $xr6, 1 + vreplvei.w $vr8, $vr7, 3 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr7, 1 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr8, 16 + xvpermi.q $xr7, $xr9, 2 + vreplvei.w $vr8, $vr6, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 + fcvt.d.s $ft0, $ft0 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvpermi.q $xr6, $xr8, 2 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 + xvfadd.d $xr5, $xr7, $xr5 xvfadd.d $xr4, $xr6, $xr4 - xvfadd.d $xr3, $xr5, $xr3 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpermi.q $xr5, $xr6, 2 - xvst $xr5, $a5, 0 + xvfcvt.s.d $xr4, $xr5, $xr4 + xvperm.w $xr4, $xr4, $xr1 + xvst $xr4, $a5, 0 xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 @@ -5392,8 +5225,7 @@ A__align_gapmap: # @A__align_gapmap ori $a2, $zero, 1 beqz $s8, .LBB6_152 .LBB6_75: - ld.d $a5, $sp, 256 # 8-byte Folded Reload - ld.d $a3, $sp, 224 # 8-byte Folded Reload + ld.d $a3, $sp, 232 # 8-byte Folded Reload ori $a0, $zero, 0 lu32i.d $a0, -1 add.d $a0, $a3, $a0 @@ -5414,6 +5246,7 @@ A__align_gapmap: # @A__align_gapmap blt $s2, $a1, .LBB6_92 # %bb.77: # %.lr.ph382 st.d $a2, $sp, 224 # 8-byte Folded Spill + st.d $s6, $sp, 232 # 8-byte Folded Spill move $s6, $s3 ld.d $a0, $sp, 120 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(impmtx) @@ -5439,7 +5272,7 @@ A__align_gapmap: # @A__align_gapmap ori $a2, $zero, 0 lu32i.d $a2, -1 add.d $a2, $a3, $a2 - srai.d $s1, $a2, 30 + srai.d $fp, $a2, 30 addi.d $a0, $a0, 4 st.d $a0, $sp, 152 # 8-byte Folded Spill addi.d $a0, $a1, 4 @@ -5454,27 +5287,27 @@ A__align_gapmap: # @A__align_gapmap .LBB6_78: # %._crit_edge374 # in Loop: Header=BB6_79 Depth=1 st.d $s5, $sp, 272 # 8-byte Folded Spill - fldx.s $fa0, $s3, $s1 + fldx.s $fa0, $s3, $fp addi.d $s4, $s4, 1 fstx.s $fa0, $s0, $a0 move $s5, $s3 - ld.d $a5, $sp, 256 # 8-byte Folded Reload beq $s4, $s2, .LBB6_93 .LBB6_79: # =>This Loop Header: Depth=1 # Child Loop BB6_82 Depth 2 # Child Loop BB6_86 Depth 2 - addi.d $fp, $s4, -1 - slli.d $s7, $fp, 2 + addi.d $s1, $s4, -1 + slli.d $s7, $s1, 2 ld.d $a0, $sp, 280 # 8-byte Folded Reload fldx.s $fa0, $a0, $s7 ld.d $s3, $sp, 272 # 8-byte Folded Reload fst.s $fa0, $s5, 0 move $a0, $s3 ld.d $a1, $sp, 248 # 8-byte Folded Reload - ld.d $a2, $sp, 232 # 8-byte Folded Reload + ld.d $a2, $sp, 256 # 8-byte Folded Reload move $a3, $s4 move $a4, $s8 - move $a6, $s6 + move $a5, $s6 + ld.d $a6, $sp, 232 # 8-byte Folded Reload move $a7, $zero pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 @@ -5593,7 +5426,7 @@ A__align_gapmap: # @A__align_gapmap bcnez $fcc0, .LBB6_85 # %bb.91: # in Loop: Header=BB6_86 Depth=2 fst.s $fa6, $t0, 0 - st.w $fp, $a7, 0 + st.w $s1, $a7, 0 b .LBB6_85 .LBB6_92: movgr2fr.w $fs0, $zero @@ -5633,20 +5466,30 @@ A__align_gapmap: # @A__align_gapmap move $a3, $a2 bstrins.d $a3, $a5, 2, 0 xvreplgr2vr.w $xr0, $a0 - pcalau12i $a5, %pc_hi20(.LCPI6_2) - xvld $xr1, $a5, %pc_lo12(.LCPI6_2) pcalau12i $a5, %pc_hi20(.LCPI6_3) - xvld $xr2, $a5, %pc_lo12(.LCPI6_3) + xvld $xr1, $a5, %pc_lo12(.LCPI6_3) + pcalau12i $a5, %pc_hi20(.LCPI6_4) + xvld $xr2, $a5, %pc_lo12(.LCPI6_4) + pcalau12i $a5, %pc_hi20(.LCPI6_1) + xvld $xr3, $a5, %pc_lo12(.LCPI6_1) ld.d $a5, $sp, 216 # 8-byte Folded Reload - xvreplgr2vr.d $xr3, $a5 + xvreplgr2vr.d $xr4, $a5 addi.d $a5, $s5, 4 - xvldi $xr4, -800 + xvldi $xr5, -800 move $a6, $a4 .p2align 4, , 16 .LBB6_97: # %vector.body716 # =>This Inner Loop Header: Depth=1 - xvsub.d $xr5, $xr3, $xr1 - xvsub.d $xr6, $xr3, $xr2 + xvsub.d $xr6, $xr4, $xr1 + xvsub.d $xr7, $xr4, $xr2 + xvpickve2gr.d $a7, $xr7, 0 + vinsgr2vr.w $vr8, $a7, 0 + xvpickve2gr.d $a7, $xr7, 1 + vinsgr2vr.w $vr8, $a7, 1 + xvpickve2gr.d $a7, $xr7, 2 + vinsgr2vr.w $vr8, $a7, 2 + xvpickve2gr.d $a7, $xr7, 3 + vinsgr2vr.w $vr8, $a7, 3 xvpickve2gr.d $a7, $xr6, 0 vinsgr2vr.w $vr7, $a7, 0 xvpickve2gr.d $a7, $xr6, 1 @@ -5655,73 +5498,44 @@ A__align_gapmap: # @A__align_gapmap vinsgr2vr.w $vr7, $a7, 2 xvpickve2gr.d $a7, $xr6, 3 vinsgr2vr.w $vr7, $a7, 3 - xvpickve2gr.d $a7, $xr5, 0 - vinsgr2vr.w $vr6, $a7, 0 - xvpickve2gr.d $a7, $xr5, 1 - vinsgr2vr.w $vr6, $a7, 1 - xvpickve2gr.d $a7, $xr5, 2 - vinsgr2vr.w $vr6, $a7, 2 - xvpickve2gr.d $a7, $xr5, 3 - vinsgr2vr.w $vr6, $a7, 3 - xvpermi.q $xr7, $xr6, 2 - xvmul.w $xr5, $xr0, $xr7 - xvpermi.q $xr6, $xr5, 1 + xvpermi.q $xr8, $xr7, 2 + xvmul.w $xr6, $xr0, $xr8 + xvpermi.q $xr7, $xr6, 1 + vext2xv.d.w $xr7, $xr7 + xvffint.d.l $xr7, $xr7 vext2xv.d.w $xr6, $xr6 + xvld $xr8, $a5, 0 xvffint.d.l $xr6, $xr6 - vext2xv.d.w $xr5, $xr5 - xvld $xr7, $a5, 0 - xvffint.d.l $xr5, $xr5 - xvfmul.d $xr5, $xr5, $xr4 - xvfmul.d $xr6, $xr6, $xr4 - xvpermi.q $xr8, $xr7, 1 - vreplvei.w $vr9, $vr8, 3 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr10, $vr8, 2 + xvfmul.d $xr6, $xr6, $xr5 + xvfmul.d $xr7, $xr7, $xr5 + xvpermi.q $xr9, $xr8, 1 + vreplvei.w $vr10, $vr9, 3 + fcvt.d.s $ft2, $ft2 + vreplvei.w $vr11, $vr9, 2 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr10, 16 + vreplvei.w $vr10, $vr9, 1 fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr9, 16 - vreplvei.w $vr9, $vr8, 1 + vreplvei.w $vr9, $vr9, 0 fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr10, 16 + xvpermi.q $xr9, $xr11, 2 + vreplvei.w $vr10, $vr8, 3 + fcvt.d.s $ft2, $ft2 + vreplvei.w $vr11, $vr8, 2 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr10, 16 + vreplvei.w $vr10, $vr8, 1 + fcvt.d.s $ft2, $ft2 vreplvei.w $vr8, $vr8, 0 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr9, 16 - xvpermi.q $xr8, $xr10, 2 - vreplvei.w $vr9, $vr7, 3 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr10, $vr7, 2 - fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr9, 16 - vreplvei.w $vr9, $vr7, 1 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr7, $vr7, 0 - fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr9, 16 - xvpermi.q $xr7, $xr10, 2 + vextrins.d $vr8, $vr10, 16 + xvpermi.q $xr8, $xr11, 2 + xvfadd.d $xr7, $xr9, $xr7 xvfadd.d $xr6, $xr8, $xr6 - xvfadd.d $xr5, $xr7, $xr5 - xvpickve.d $xr7, $xr6, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr6, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr6, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr6, $xr6, 3 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr8, $vr6, 48 - xvpickve.d $xr6, $xr5, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr5, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr5, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 48 - xvpermi.q $xr7, $xr8, 2 - xvst $xr7, $a5, 0 + xvfcvt.s.d $xr6, $xr7, $xr6 + xvperm.w $xr6, $xr6, $xr3 + xvst $xr6, $a5, 0 xvaddi.du $xr2, $xr2, 8 xvaddi.du $xr1, $xr1, 8 addi.d $a6, $a6, -8 @@ -5777,8 +5591,8 @@ A__align_gapmap: # @A__align_gapmap move $a0, $a3 bstrins.d $a0, $a5, 1, 0 xvreplve0.d $xr2, $xr0 - pcalau12i $a5, %pc_hi20(.LCPI6_4) - vld $vr3, $a5, %pc_lo12(.LCPI6_4) + pcalau12i $a5, %pc_hi20(.LCPI6_5) + vld $vr3, $a5, %pc_lo12(.LCPI6_5) xvreplve0.d $xr4, $xr1 addi.d $a5, $a1, 4 xvldi $xr5, -800 @@ -5803,18 +5617,9 @@ A__align_gapmap: # @A__align_gapmap vextrins.d $vr7, $vr8, 16 xvpermi.q $xr7, $xr9, 2 xvfmadd.d $xr6, $xr4, $xr6, $xr7 - xvpickve.d $xr7, $xr6, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr6, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr6, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr6, $xr6, 3 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr8, $vr6, 48 - vst $vr8, $a5, 0 + xvpermi.q $xr7, $xr6, 1 + vfcvt.s.d $vr6, $vr7, $vr6 + vst $vr6, $a5, 0 vaddi.wu $vr3, $vr3, 4 addi.d $a6, $a6, -4 addi.d $a5, $a5, 16 @@ -5981,8 +5786,8 @@ A__align_gapmap: # @A__align_gapmap move $a3, $zero b .LBB6_131 .LBB6_128: # %vector.ph747 - pcalau12i $a4, %pc_hi20(.LCPI6_5) - vld $vr0, $a4, %pc_lo12(.LCPI6_5) + pcalau12i $a4, %pc_hi20(.LCPI6_6) + vld $vr0, $a4, %pc_lo12(.LCPI6_6) bstrpick.d $a3, $a3, 31, 2 slli.d $a3, $a3, 2 addi.d $a4, $s2, 16 @@ -6043,119 +5848,79 @@ A__align_gapmap: # @A__align_gapmap b .LBB6_157 .LBB6_138: # %vector.ph bstrpick.d $a3, $s1, 30, 3 + pcalau12i $a4, %pc_hi20(.LCPI6_1) + xvld $xr0, $a4, %pc_lo12(.LCPI6_1) slli.d $a3, $a3, 3 - xvldi $xr0, -912 - xvldi $xr1, -928 + xvldi $xr1, -912 + xvldi $xr2, -928 move $a4, $a0 move $a5, $a2 move $a6, $a3 .p2align 4, , 16 .LBB6_139: # %vector.body # =>This Inner Loop Header: Depth=1 - xvld $xr2, $a4, 0 - xvpermi.q $xr3, $xr2, 1 - vreplvei.w $vr4, $vr3, 3 - fcvt.d.s $fa4, $fa4 - vreplvei.w $vr5, $vr3, 2 + xvld $xr3, $a4, 0 + xvpermi.q $xr4, $xr3, 1 + vreplvei.w $vr5, $vr4, 3 fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr4, 16 - vreplvei.w $vr4, $vr3, 1 - fcvt.d.s $fa4, $fa4 - vreplvei.w $vr3, $vr3, 0 - fcvt.d.s $fa3, $fa3 - vextrins.d $vr3, $vr4, 16 - xvpermi.q $xr3, $xr5, 2 - vreplvei.w $vr4, $vr2, 3 - fcvt.d.s $fa4, $fa4 - vreplvei.w $vr5, $vr2, 2 + vreplvei.w $vr6, $vr4, 2 + fcvt.d.s $fa6, $fa6 + vextrins.d $vr6, $vr5, 16 + vreplvei.w $vr5, $vr4, 1 fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr4, 16 - vreplvei.w $vr4, $vr2, 1 - fcvt.d.s $fa4, $fa4 - vreplvei.w $vr2, $vr2, 0 - fcvt.d.s $fa2, $fa2 - vextrins.d $vr2, $vr4, 16 - xvpermi.q $xr2, $xr5, 2 - xvfsub.d $xr3, $xr0, $xr3 - xvfsub.d $xr2, $xr0, $xr2 - xvfmul.d $xr3, $xr3, $xr1 - xvfmul.d $xr2, $xr2, $xr1 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpickve.d $xr3, $xr2, 1 - fcvt.s.d $fa3, $fa3 - xvpickve.d $xr4, $xr2, 0 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr4, $vr3, 16 - xvpickve.d $xr3, $xr2, 2 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr4, $vr3, 32 - xvpickve.d $xr2, $xr2, 3 - fcvt.s.d $fa2, $fa2 - xvld $xr3, $a5, 0 - vextrins.w $vr4, $vr2, 48 - xvpermi.q $xr4, $xr5, 2 - xvst $xr4, $a4, 0 - xvpermi.q $xr2, $xr3, 1 - vreplvei.w $vr4, $vr2, 3 + vreplvei.w $vr4, $vr4, 0 fcvt.d.s $fa4, $fa4 - vreplvei.w $vr5, $vr2, 2 + vextrins.d $vr4, $vr5, 16 + xvpermi.q $xr4, $xr6, 2 + vreplvei.w $vr5, $vr3, 3 fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr4, 16 - vreplvei.w $vr4, $vr2, 1 - fcvt.d.s $fa4, $fa4 - vreplvei.w $vr2, $vr2, 0 - fcvt.d.s $fa2, $fa2 - vextrins.d $vr2, $vr4, 16 - xvpermi.q $xr2, $xr5, 2 + vreplvei.w $vr6, $vr3, 2 + fcvt.d.s $fa6, $fa6 + vextrins.d $vr6, $vr5, 16 + vreplvei.w $vr5, $vr3, 1 + fcvt.d.s $fa5, $fa5 + vreplvei.w $vr3, $vr3, 0 + fcvt.d.s $fa3, $fa3 + vextrins.d $vr3, $vr5, 16 + xvpermi.q $xr3, $xr6, 2 + xvfsub.d $xr4, $xr1, $xr4 + xvfsub.d $xr3, $xr1, $xr3 + xvfmul.d $xr4, $xr4, $xr2 + xvfmul.d $xr3, $xr3, $xr2 + xvld $xr5, $a5, 0 + xvfcvt.s.d $xr3, $xr4, $xr3 + xvperm.w $xr3, $xr3, $xr0 + xvst $xr3, $a4, 0 + xvpermi.q $xr3, $xr5, 1 vreplvei.w $vr4, $vr3, 3 fcvt.d.s $fa4, $fa4 - vreplvei.w $vr5, $vr3, 2 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr4, 16 + vreplvei.w $vr6, $vr3, 2 + fcvt.d.s $fa6, $fa6 + vextrins.d $vr6, $vr4, 16 vreplvei.w $vr4, $vr3, 1 fcvt.d.s $fa4, $fa4 vreplvei.w $vr3, $vr3, 0 fcvt.d.s $fa3, $fa3 vextrins.d $vr3, $vr4, 16 - xvpermi.q $xr3, $xr5, 2 - xvfsub.d $xr2, $xr0, $xr2 - xvfsub.d $xr3, $xr0, $xr3 - xvfmul.d $xr2, $xr2, $xr1 - xvfmul.d $xr3, $xr3, $xr1 - xvpickve.d $xr4, $xr2, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr2, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr2, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr2, $xr2, 3 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr5, $vr2, 48 - xvpickve.d $xr2, $xr3, 1 - fcvt.s.d $fa2, $fa2 - xvpickve.d $xr4, $xr3, 0 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr4, $vr2, 16 - xvpickve.d $xr2, $xr3, 2 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr4, $vr2, 32 - xvpickve.d $xr2, $xr3, 3 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr4, $vr2, 48 - xvpermi.q $xr4, $xr5, 2 - xvst $xr4, $a5, 0 + xvpermi.q $xr3, $xr6, 2 + vreplvei.w $vr4, $vr5, 3 + fcvt.d.s $fa4, $fa4 + vreplvei.w $vr6, $vr5, 2 + fcvt.d.s $fa6, $fa6 + vextrins.d $vr6, $vr4, 16 + vreplvei.w $vr4, $vr5, 1 + fcvt.d.s $fa4, $fa4 + vreplvei.w $vr5, $vr5, 0 + fcvt.d.s $fa5, $fa5 + vextrins.d $vr5, $vr4, 16 + xvpermi.q $xr5, $xr6, 2 + xvfsub.d $xr3, $xr1, $xr3 + xvfsub.d $xr4, $xr1, $xr5 + xvfmul.d $xr3, $xr3, $xr2 + xvfmul.d $xr4, $xr4, $xr2 + xvfcvt.s.d $xr3, $xr3, $xr4 + xvperm.w $xr3, $xr3, $xr0 + xvst $xr3, $a5, 0 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 addi.d $a4, $a4, 32 @@ -6166,119 +5931,79 @@ A__align_gapmap: # @A__align_gapmap .LBB6_141: # %vector.ph528 ld.d $a3, $sp, 216 # 8-byte Folded Reload bstrpick.d $a3, $a3, 30, 3 + pcalau12i $a4, %pc_hi20(.LCPI6_1) + xvld $xr0, $a4, %pc_lo12(.LCPI6_1) slli.d $a3, $a3, 3 - xvldi $xr0, -912 - xvldi $xr1, -928 + xvldi $xr1, -912 + xvldi $xr2, -928 move $a4, $a0 move $a5, $a2 move $a6, $a3 .p2align 4, , 16 .LBB6_142: # %vector.body531 # =>This Inner Loop Header: Depth=1 - xvld $xr2, $a4, 0 - xvpermi.q $xr3, $xr2, 1 - vreplvei.w $vr4, $vr3, 3 - fcvt.d.s $fa4, $fa4 - vreplvei.w $vr5, $vr3, 2 + xvld $xr3, $a4, 0 + xvpermi.q $xr4, $xr3, 1 + vreplvei.w $vr5, $vr4, 3 fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr4, 16 - vreplvei.w $vr4, $vr3, 1 - fcvt.d.s $fa4, $fa4 - vreplvei.w $vr3, $vr3, 0 - fcvt.d.s $fa3, $fa3 - vextrins.d $vr3, $vr4, 16 - xvpermi.q $xr3, $xr5, 2 - vreplvei.w $vr4, $vr2, 3 - fcvt.d.s $fa4, $fa4 - vreplvei.w $vr5, $vr2, 2 + vreplvei.w $vr6, $vr4, 2 + fcvt.d.s $fa6, $fa6 + vextrins.d $vr6, $vr5, 16 + vreplvei.w $vr5, $vr4, 1 fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr4, 16 - vreplvei.w $vr4, $vr2, 1 - fcvt.d.s $fa4, $fa4 - vreplvei.w $vr2, $vr2, 0 - fcvt.d.s $fa2, $fa2 - vextrins.d $vr2, $vr4, 16 - xvpermi.q $xr2, $xr5, 2 - xvfsub.d $xr3, $xr0, $xr3 - xvfsub.d $xr2, $xr0, $xr2 - xvfmul.d $xr3, $xr3, $xr1 - xvfmul.d $xr2, $xr2, $xr1 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpickve.d $xr3, $xr2, 1 - fcvt.s.d $fa3, $fa3 - xvpickve.d $xr4, $xr2, 0 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr4, $vr3, 16 - xvpickve.d $xr3, $xr2, 2 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr4, $vr3, 32 - xvpickve.d $xr2, $xr2, 3 - fcvt.s.d $fa2, $fa2 - xvld $xr3, $a5, 0 - vextrins.w $vr4, $vr2, 48 - xvpermi.q $xr4, $xr5, 2 - xvst $xr4, $a4, 0 - xvpermi.q $xr2, $xr3, 1 - vreplvei.w $vr4, $vr2, 3 + vreplvei.w $vr4, $vr4, 0 fcvt.d.s $fa4, $fa4 - vreplvei.w $vr5, $vr2, 2 + vextrins.d $vr4, $vr5, 16 + xvpermi.q $xr4, $xr6, 2 + vreplvei.w $vr5, $vr3, 3 fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr4, 16 - vreplvei.w $vr4, $vr2, 1 - fcvt.d.s $fa4, $fa4 - vreplvei.w $vr2, $vr2, 0 - fcvt.d.s $fa2, $fa2 - vextrins.d $vr2, $vr4, 16 - xvpermi.q $xr2, $xr5, 2 + vreplvei.w $vr6, $vr3, 2 + fcvt.d.s $fa6, $fa6 + vextrins.d $vr6, $vr5, 16 + vreplvei.w $vr5, $vr3, 1 + fcvt.d.s $fa5, $fa5 + vreplvei.w $vr3, $vr3, 0 + fcvt.d.s $fa3, $fa3 + vextrins.d $vr3, $vr5, 16 + xvpermi.q $xr3, $xr6, 2 + xvfsub.d $xr4, $xr1, $xr4 + xvfsub.d $xr3, $xr1, $xr3 + xvfmul.d $xr4, $xr4, $xr2 + xvfmul.d $xr3, $xr3, $xr2 + xvld $xr5, $a5, 0 + xvfcvt.s.d $xr3, $xr4, $xr3 + xvperm.w $xr3, $xr3, $xr0 + xvst $xr3, $a4, 0 + xvpermi.q $xr3, $xr5, 1 vreplvei.w $vr4, $vr3, 3 fcvt.d.s $fa4, $fa4 - vreplvei.w $vr5, $vr3, 2 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr4, 16 + vreplvei.w $vr6, $vr3, 2 + fcvt.d.s $fa6, $fa6 + vextrins.d $vr6, $vr4, 16 vreplvei.w $vr4, $vr3, 1 fcvt.d.s $fa4, $fa4 vreplvei.w $vr3, $vr3, 0 fcvt.d.s $fa3, $fa3 vextrins.d $vr3, $vr4, 16 - xvpermi.q $xr3, $xr5, 2 - xvfsub.d $xr2, $xr0, $xr2 - xvfsub.d $xr3, $xr0, $xr3 - xvfmul.d $xr2, $xr2, $xr1 - xvfmul.d $xr3, $xr3, $xr1 - xvpickve.d $xr4, $xr2, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr2, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr2, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr2, $xr2, 3 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr5, $vr2, 48 - xvpickve.d $xr2, $xr3, 1 - fcvt.s.d $fa2, $fa2 - xvpickve.d $xr4, $xr3, 0 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr4, $vr2, 16 - xvpickve.d $xr2, $xr3, 2 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr4, $vr2, 32 - xvpickve.d $xr2, $xr3, 3 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr4, $vr2, 48 - xvpermi.q $xr4, $xr5, 2 - xvst $xr4, $a5, 0 + xvpermi.q $xr3, $xr6, 2 + vreplvei.w $vr4, $vr5, 3 + fcvt.d.s $fa4, $fa4 + vreplvei.w $vr6, $vr5, 2 + fcvt.d.s $fa6, $fa6 + vextrins.d $vr6, $vr4, 16 + vreplvei.w $vr4, $vr5, 1 + fcvt.d.s $fa4, $fa4 + vreplvei.w $vr5, $vr5, 0 + fcvt.d.s $fa5, $fa5 + vextrins.d $vr5, $vr4, 16 + xvpermi.q $xr5, $xr6, 2 + xvfsub.d $xr3, $xr1, $xr3 + xvfsub.d $xr4, $xr1, $xr5 + xvfmul.d $xr3, $xr3, $xr2 + xvfmul.d $xr4, $xr4, $xr2 + xvfcvt.s.d $xr3, $xr3, $xr4 + xvperm.w $xr3, $xr3, $xr0 + xvst $xr3, $a5, 0 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 addi.d $a4, $a4, 32 @@ -6333,15 +6058,14 @@ A__align_gapmap: # @A__align_gapmap movgr2fr.w $fa0, $zero ori $a0, $zero, 1 st.d $a0, $sp, 240 # 8-byte Folded Spill - ld.d $a5, $sp, 256 # 8-byte Folded Reload - ld.d $a3, $sp, 224 # 8-byte Folded Reload + ld.d $a3, $sp, 232 # 8-byte Folded Reload b .LBB6_76 .LBB6_153: # %vector.ph766 andi $a7, $a6, 12 bstrpick.d $a5, $a6, 31, 4 slli.d $a5, $a5, 4 - pcalau12i $t0, %pc_hi20(.LCPI6_6) - xvld $xr0, $t0, %pc_lo12(.LCPI6_6) + pcalau12i $t0, %pc_hi20(.LCPI6_7) + xvld $xr0, $t0, %pc_lo12(.LCPI6_7) addi.d $t0, $a3, 32 xvrepli.b $xr1, -1 xvrepli.w $xr2, -9 @@ -6363,8 +6087,8 @@ A__align_gapmap: # @A__align_gapmap beqz $a7, .LBB6_160 .LBB6_157: # %vec.epilog.ph781 move $a7, $a5 - pcalau12i $a5, %pc_hi20(.LCPI6_7) - vld $vr0, $a5, %pc_lo12(.LCPI6_7) + pcalau12i $a5, %pc_hi20(.LCPI6_8) + vld $vr0, $a5, %pc_lo12(.LCPI6_8) bstrpick.d $a5, $a6, 31, 2 slli.d $a5, $a5, 2 vreplgr2vr.w $vr1, $a7 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/mltaln9.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/mltaln9.s index dddd0c5..05d7a18 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/mltaln9.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/mltaln9.s @@ -9481,9 +9481,9 @@ fastsupg: # @fastsupg .LBB29_2: # %.preheader173.lr.ph move $a0, $zero ld.d $a2, $s4, %pc_lo12(fastsupg.eff) - bstrpick.d $a3, $fp, 30, 3 - slli.d $a3, $a3, 3 - ori $a4, $zero, 8 + bstrpick.d $a3, $fp, 30, 2 + slli.d $a3, $a3, 2 + ori $a4, $zero, 4 b .LBB29_4 .p2align 4, , 16 .LBB29_3: # %._crit_edge.us @@ -9504,42 +9504,20 @@ fastsupg: # @fastsupg .p2align 4, , 16 .LBB29_6: # %vector.body.preheader # in Loop: Header=BB29_4 Depth=1 - addi.d $a7, $a6, 16 - addi.d $t0, $a5, 32 + addi.d $a7, $a6, 8 + addi.d $t0, $a5, 16 move $t1, $a3 .p2align 4, , 16 .LBB29_7: # %vector.body # Parent Loop BB29_4 Depth=1 # => This Inner Loop Header: Depth=2 - xvld $xr0, $t0, -32 - xvld $xr1, $t0, 0 - xvpickve.d $xr2, $xr0, 1 - fcvt.s.d $fa2, $fa2 - xvpickve.d $xr3, $xr0, 0 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr3, $vr2, 16 - xvpickve.d $xr2, $xr0, 2 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr3, $vr2, 32 - xvpickve.d $xr0, $xr0, 3 - fcvt.s.d $fa0, $fa0 - vextrins.w $vr3, $vr0, 48 - xvpickve.d $xr0, $xr1, 1 - fcvt.s.d $fa0, $fa0 - xvpickve.d $xr2, $xr1, 0 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr2, $vr0, 16 - xvpickve.d $xr0, $xr1, 2 - fcvt.s.d $fa0, $fa0 - vextrins.w $vr2, $vr0, 32 - xvpickve.d $xr0, $xr1, 3 - fcvt.s.d $fa0, $fa0 - vextrins.w $vr2, $vr0, 48 - vst $vr3, $a7, -16 - vst $vr2, $a7, 0 - addi.d $t1, $t1, -8 - addi.d $a7, $a7, 32 - addi.d $t0, $t0, 64 + vld $vr0, $t0, -16 + vld $vr1, $t0, 0 + vfcvt.s.d $vr0, $vr1, $vr0 + vst $vr0, $a7, -8 + addi.d $t1, $t1, -4 + addi.d $a7, $a7, 16 + addi.d $t0, $t0, 32 bnez $t1, .LBB29_7 # %bb.8: # %middle.block # in Loop: Header=BB29_4 Depth=1 @@ -10062,9 +10040,9 @@ supg: # @supg # %bb.3: # %.preheader161.lr.ph move $a0, $zero ld.d $a1, $s3, %pc_lo12(supg.eff) - bstrpick.d $a2, $s1, 30, 3 - slli.d $a2, $a2, 3 - ori $a3, $zero, 8 + bstrpick.d $a2, $s1, 30, 2 + slli.d $a2, $a2, 2 + ori $a3, $zero, 4 b .LBB30_5 .p2align 4, , 16 .LBB30_4: # %._crit_edge.us @@ -10085,42 +10063,20 @@ supg: # @supg .p2align 4, , 16 .LBB30_7: # %vector.body.preheader # in Loop: Header=BB30_5 Depth=1 - addi.d $a6, $a5, 16 - addi.d $a7, $a4, 32 + addi.d $a6, $a5, 8 + addi.d $a7, $a4, 16 move $t0, $a2 .p2align 4, , 16 .LBB30_8: # %vector.body # Parent Loop BB30_5 Depth=1 # => This Inner Loop Header: Depth=2 - xvld $xr0, $a7, -32 - xvld $xr1, $a7, 0 - xvpickve.d $xr2, $xr0, 1 - fcvt.s.d $fa2, $fa2 - xvpickve.d $xr3, $xr0, 0 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr3, $vr2, 16 - xvpickve.d $xr2, $xr0, 2 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr3, $vr2, 32 - xvpickve.d $xr0, $xr0, 3 - fcvt.s.d $fa0, $fa0 - vextrins.w $vr3, $vr0, 48 - xvpickve.d $xr0, $xr1, 1 - fcvt.s.d $fa0, $fa0 - xvpickve.d $xr2, $xr1, 0 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr2, $vr0, 16 - xvpickve.d $xr0, $xr1, 2 - fcvt.s.d $fa0, $fa0 - vextrins.w $vr2, $vr0, 32 - xvpickve.d $xr0, $xr1, 3 - fcvt.s.d $fa0, $fa0 - vextrins.w $vr2, $vr0, 48 - vst $vr3, $a6, -16 - vst $vr2, $a6, 0 - addi.d $t0, $t0, -8 - addi.d $a6, $a6, 32 - addi.d $a7, $a7, 64 + vld $vr0, $a7, -16 + vld $vr1, $a7, 0 + vfcvt.s.d $vr0, $vr1, $vr0 + vst $vr0, $a6, -8 + addi.d $t0, $t0, -4 + addi.d $a6, $a6, 16 + addi.d $a7, $a7, 32 bnez $t0, .LBB30_8 # %bb.9: # %middle.block # in Loop: Header=BB30_5 Depth=1 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partQalignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partQalignmm.s index 64ad4e7..6ff3337 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partQalignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partQalignmm.s @@ -970,6 +970,15 @@ part_imp_match_initQ: # @part_imp_match_initQ .section .rodata.cst32,"aM",@progbits,32 .p2align 5, 0x0 .LCPI4_1: + .word 0 # 0x0 + .word 1 # 0x1 + .word 4 # 0x4 + .word 5 # 0x5 + .word 2 # 0x2 + .word 3 # 0x3 + .word 6 # 0x6 + .word 7 # 0x7 +.LCPI4_2: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 @@ -978,17 +987,17 @@ part_imp_match_initQ: # @part_imp_match_initQ .word 6 # 0x6 .word 7 # 0x7 .word 8 # 0x8 -.LCPI4_2: +.LCPI4_3: .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 .dword 8 # 0x8 -.LCPI4_3: +.LCPI4_4: .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 .dword 4 # 0x4 -.LCPI4_6: +.LCPI4_7: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -999,15 +1008,15 @@ part_imp_match_initQ: # @part_imp_match_initQ .word 7 # 0x7 .section .rodata.cst16,"aM",@progbits,16 .p2align 4, 0x0 -.LCPI4_4: +.LCPI4_5: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 .word 4 # 0x4 -.LCPI4_5: +.LCPI4_6: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI4_7: +.LCPI4_8: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -1036,14 +1045,14 @@ partQ__align: # @partQ__align st.d $a6, $sp, 32 # 8-byte Folded Spill move $s8, $a5 st.d $a4, $sp, 408 # 8-byte Folded Spill - st.d $a3, $sp, 232 # 8-byte Folded Spill - move $s4, $a2 + st.d $a3, $sp, 224 # 8-byte Folded Spill + st.d $a2, $sp, 344 # 8-byte Folded Spill move $s1, $a1 move $s2, $a0 pcalau12i $a0, %got_pc_hi20(penalty) ld.d $a0, $a0, %got_pc_lo12(penalty) pcalau12i $a1, %pc_hi20(partQ__align.orlgth1) - st.d $a1, $sp, 224 # 8-byte Folded Spill + st.d $a1, $sp, 216 # 8-byte Folded Spill ld.w $s7, $a1, %pc_lo12(partQ__align.orlgth1) ld.w $a0, $a0, 0 st.d $a0, $sp, 40 # 8-byte Folded Spill @@ -1066,7 +1075,7 @@ partQ__align: # @partQ__align move $a1, $zero pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 224 # 8-byte Folded Reload + ld.d $a1, $sp, 216 # 8-byte Folded Reload ld.w $s7, $a1, %pc_lo12(partQ__align.orlgth1) ld.d $a1, $sp, 120 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.mseq2) @@ -1082,29 +1091,29 @@ partQ__align: # @partQ__align jirl $ra, $ra, 0 move $s5, $a0 pcalau12i $a0, %pc_hi20(partQ__align.orlgth2) - st.d $a0, $sp, 24 # 8-byte Folded Spill + st.d $a0, $sp, 16 # 8-byte Folded Spill ld.w $s0, $a0, %pc_lo12(partQ__align.orlgth2) addi.w $a1, $s5, 0 pcalau12i $s6, %pc_hi20(partQ__align.w1) pcalau12i $a0, %pc_hi20(partQ__align.w2) - st.d $a0, $sp, 336 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(partQ__align.initverticalw) st.d $a0, $sp, 328 # 8-byte Folded Spill + pcalau12i $a0, %pc_hi20(partQ__align.initverticalw) + st.d $a0, $sp, 320 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.lastverticalw) st.d $a0, $sp, 136 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.m) - st.d $a0, $sp, 344 # 8-byte Folded Spill + st.d $a0, $sp, 336 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.mp) st.d $a0, $sp, 56 # 8-byte Folded Spill - pcalau12i $a7, %pc_hi20(partQ__align.mseq) + pcalau12i $s4, %pc_hi20(partQ__align.mseq) pcalau12i $a0, %pc_hi20(partQ__align.digf1) st.d $a0, $sp, 376 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.digf2) st.d $a0, $sp, 352 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.diaf1) - st.d $a0, $sp, 240 # 8-byte Folded Spill + st.d $a0, $sp, 232 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.diaf2) - st.d $a0, $sp, 272 # 8-byte Folded Spill + st.d $a0, $sp, 240 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.gapz1) st.d $a0, $sp, 392 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.gapz2) @@ -1146,22 +1155,21 @@ partQ__align: # @partQ__align pcalau12i $a0, %pc_hi20(partQ__align.cpmx2) st.d $a0, $sp, 368 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.floatwork) - st.d $a0, $sp, 320 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(partQ__align.intwork) st.d $a0, $sp, 312 # 8-byte Folded Spill + pcalau12i $a0, %pc_hi20(partQ__align.intwork) + st.d $a0, $sp, 272 # 8-byte Folded Spill st.d $s1, $sp, 144 # 8-byte Folded Spill - st.d $s2, $sp, 416 # 8-byte Folded Spill st.d $s3, $sp, 152 # 8-byte Folded Spill st.d $s5, $sp, 48 # 8-byte Folded Spill st.d $a1, $sp, 384 # 8-byte Folded Spill st.d $fp, $sp, 128 # 8-byte Folded Spill - st.d $s6, $sp, 216 # 8-byte Folded Spill + st.d $s6, $sp, 24 # 8-byte Folded Spill blt $s7, $s3, .LBB4_4 # %bb.3: bge $s0, $a1, .LBB4_8 .LBB4_4: - move $s3, $a7 - pcalau12i $s2, %pc_hi20(partQ__align.match) + st.d $s4, $sp, 416 # 8-byte Folded Spill + pcalau12i $s3, %pc_hi20(partQ__align.match) blez $s7, .LBB4_7 # %bb.5: blez $s0, .LBB4_7 @@ -1169,14 +1177,14 @@ partQ__align: # @partQ__align ld.d $a0, $s6, %pc_lo12(partQ__align.w1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 336 # 8-byte Folded Reload + ld.d $a0, $sp, 328 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.w2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $s2, %pc_lo12(partQ__align.match) + ld.d $a0, $s3, %pc_lo12(partQ__align.match) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 328 # 8-byte Folded Reload + ld.d $a0, $sp, 320 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.initverticalw) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 @@ -1184,7 +1192,7 @@ partQ__align: # @partQ__align ld.d $a0, $a0, %pc_lo12(partQ__align.lastverticalw) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a0, $sp, 336 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.m) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 @@ -1192,7 +1200,8 @@ partQ__align: # @partQ__align ld.d $a0, $a0, %pc_lo12(partQ__align.mp) pcaddu18i $ra, %call36(FreeIntVec) jirl $ra, $ra, 0 - ld.d $a0, $s3, %pc_lo12(partQ__align.mseq) + ld.d $a0, $sp, 416 # 8-byte Folded Reload + ld.d $a0, $a0, %pc_lo12(partQ__align.mseq) pcaddu18i $ra, %call36(FreeCharMtx) jirl $ra, $ra, 0 ld.d $a0, $sp, 376 # 8-byte Folded Reload @@ -1203,11 +1212,11 @@ partQ__align: # @partQ__align ld.d $a0, $a0, %pc_lo12(partQ__align.digf2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.diaf1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $sp, 240 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.diaf2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 @@ -1291,17 +1300,17 @@ partQ__align: # @partQ__align ld.d $a0, $a0, %pc_lo12(partQ__align.cpmx2) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 320 # 8-byte Folded Reload + ld.d $a0, $sp, 312 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.floatwork) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 312 # 8-byte Folded Reload + ld.d $a0, $sp, 272 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.intwork) pcaddu18i $ra, %call36(FreeIntMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 224 # 8-byte Folded Reload + ld.d $a0, $sp, 216 # 8-byte Folded Reload ld.w $s7, $a0, %pc_lo12(partQ__align.orlgth1) - ld.d $a0, $sp, 24 # 8-byte Folded Reload + ld.d $a0, $sp, 16 # 8-byte Folded Reload ld.w $s0, $a0, %pc_lo12(partQ__align.orlgth2) .LBB4_7: pcalau12i $a0, %pc_hi20(.LCPI4_0) @@ -1334,17 +1343,17 @@ partQ__align: # @partQ__align move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 336 # 8-byte Folded Reload + ld.d $a1, $sp, 328 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.w2) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - st.d $a0, $s2, %pc_lo12(partQ__align.match) + st.d $a0, $s3, %pc_lo12(partQ__align.match) addi.w $s6, $s7, 102 move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 328 # 8-byte Folded Reload + ld.d $a1, $sp, 320 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.initverticalw) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -1354,7 +1363,7 @@ partQ__align: # @partQ__align move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 344 # 8-byte Folded Reload + ld.d $a1, $sp, 336 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.m) move $a0, $s5 pcaddu18i $ra, %call36(AllocateIntVec) @@ -1367,8 +1376,8 @@ partQ__align: # @partQ__align add.w $a1, $s1, $fp pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 - move $s2, $s3 - st.d $a0, $s3, %pc_lo12(partQ__align.mseq) + ld.d $s4, $sp, 416 # 8-byte Folded Reload + st.d $a0, $s4, %pc_lo12(partQ__align.mseq) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 @@ -1382,12 +1391,12 @@ partQ__align: # @partQ__align move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 240 # 8-byte Folded Reload + ld.d $a1, $sp, 232 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.diaf1) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 272 # 8-byte Folded Reload + ld.d $a1, $sp, 240 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.diaf2) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -1500,26 +1509,24 @@ partQ__align: # @partQ__align move $a0, $s3 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 320 # 8-byte Folded Reload + ld.d $a1, $sp, 312 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.floatwork) ori $a1, $zero, 26 move $a0, $s3 pcaddu18i $ra, %call36(AllocateIntMtx) jirl $ra, $ra, 0 - move $a7, $s2 - ld.d $a1, $sp, 312 # 8-byte Folded Reload + ld.d $a1, $sp, 272 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.intwork) - ld.d $a0, $sp, 224 # 8-byte Folded Reload + ld.d $a0, $sp, 216 # 8-byte Folded Reload st.w $s7, $a0, %pc_lo12(partQ__align.orlgth1) - ld.d $a0, $sp, 24 # 8-byte Folded Reload + ld.d $a0, $sp, 16 # 8-byte Folded Reload st.w $s0, $a0, %pc_lo12(partQ__align.orlgth2) - ld.d $s2, $sp, 416 # 8-byte Folded Reload .LBB4_8: ld.d $a4, $sp, 408 # 8-byte Folded Reload - ld.d $s3, $sp, 232 # 8-byte Folded Reload + ld.d $s3, $sp, 224 # 8-byte Folded Reload blez $a4, .LBB4_16 # %bb.9: # %.lr.ph - ld.d $a0, $a7, %pc_lo12(partQ__align.mseq) + ld.d $a0, $s4, %pc_lo12(partQ__align.mseq) ld.d $a1, $sp, 112 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(partQ__align.mseq1) ori $a3, $zero, 8 @@ -1564,7 +1571,7 @@ partQ__align: # @partQ__align .LBB4_16: # %.preheader643 blez $s8, .LBB4_22 # %bb.17: # %.lr.ph647 - ld.d $a0, $a7, %pc_lo12(partQ__align.mseq) + ld.d $a0, $s4, %pc_lo12(partQ__align.mseq) ld.d $a1, $sp, 120 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(partQ__align.mseq2) ori $a2, $zero, 8 @@ -1592,13 +1599,14 @@ partQ__align: # @partQ__align addi.d $a1, $a1, 8 bnez $a2, .LBB4_21 .LBB4_22: # %._crit_edge - ld.d $s5, $sp, 584 + ld.d $s4, $sp, 584 pcalau12i $a0, %got_pc_hi20(commonAlloc1) ld.d $fp, $a0, %got_pc_lo12(commonAlloc1) ld.w $a0, $fp, 0 pcalau12i $a1, %got_pc_hi20(commonAlloc2) ld.d $s1, $a1, %got_pc_lo12(commonAlloc2) ld.w $a1, $s1, 0 + st.d $s2, $sp, 416 # 8-byte Folded Spill blt $a0, $s7, .LBB4_25 # %bb.23: # %._crit_edge blt $a1, $s0, .LBB4_25 @@ -1617,10 +1625,10 @@ partQ__align: # @partQ__align ld.d $a0, $a0, 0 pcaddu18i $ra, %call36(FreeIntMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 224 # 8-byte Folded Reload + ld.d $a0, $sp, 216 # 8-byte Folded Reload ld.w $s7, $a0, %pc_lo12(partQ__align.orlgth1) ld.w $a0, $fp, 0 - ld.d $a1, $sp, 24 # 8-byte Folded Reload + ld.d $a1, $sp, 16 # 8-byte Folded Reload ld.w $s0, $a1, %pc_lo12(partQ__align.orlgth2) ld.w $a1, $s1, 0 .LBB4_28: @@ -1652,10 +1660,10 @@ partQ__align: # @partQ__align st.d $a2, $sp, 40 # 8-byte Folded Spill st.d $a0, $a2, %pc_lo12(partQ__align.ijp) move $a0, $s2 - move $a2, $s4 + ld.d $a2, $sp, 344 # 8-byte Folded Reload ld.d $s6, $sp, 152 # 8-byte Folded Reload move $a3, $s6 - move $s1, $a4 + move $s7, $a4 pcaddu18i $ra, %call36(cpmx_calc_new) jirl $ra, $ra, 0 ld.d $a0, $sp, 368 # 8-byte Folded Reload @@ -1663,26 +1671,27 @@ partQ__align: # @partQ__align ld.d $fp, $sp, 144 # 8-byte Folded Reload move $a0, $fp move $a2, $s3 - ld.d $s0, $sp, 384 # 8-byte Folded Reload - move $a3, $s0 + ld.d $s1, $sp, 384 # 8-byte Folded Reload + move $a3, $s1 move $a4, $s8 pcaddu18i $ra, %call36(cpmx_calc_new) jirl $ra, $ra, 0 ld.d $a0, $sp, 304 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.ogcp1g) - beqz $s5, .LBB4_31 + beqz $s4, .LBB4_31 # %bb.30: + move $a1, $s7 ld.d $s0, $sp, 608 - move $s7, $s4 - move $a5, $s5 - st.d $s5, $sp, 16 # 8-byte Folded Spill - ld.d $a6, $sp, 600 + move $a5, $s4 + st.d $s4, $sp, 8 # 8-byte Folded Spill + ld.d $s7, $sp, 344 # 8-byte Folded Reload + ld.d $s4, $sp, 600 ld.d $s6, $sp, 592 - move $a1, $s1 + move $s1, $a1 move $a2, $s2 - move $a3, $s4 + move $a3, $s7 ld.d $a4, $sp, 152 # 8-byte Folded Reload - move $s4, $a6 + move $a6, $s4 pcaddu18i $ra, %call36(new_OpeningGapCount_zure) jirl $ra, $ra, 0 ld.d $a0, $sp, 208 # 8-byte Folded Reload @@ -1703,9 +1712,9 @@ partQ__align: # @partQ__align move $a2, $s2 move $a3, $s7 ld.d $a4, $sp, 152 # 8-byte Folded Reload - ld.d $s0, $sp, 16 # 8-byte Folded Reload + ld.d $s0, $sp, 8 # 8-byte Folded Reload move $a5, $s0 - st.d $s4, $sp, 224 # 8-byte Folded Spill + st.d $s4, $sp, 216 # 8-byte Folded Spill move $a6, $s4 pcaddu18i $ra, %call36(new_FinalGapCount_zure) jirl $ra, $ra, 0 @@ -1736,37 +1745,40 @@ partQ__align: # @partQ__align move $a3, $s5 ld.d $a4, $sp, 384 # 8-byte Folded Reload move $a5, $s6 + move $s5, $s3 move $a6, $s3 pcaddu18i $ra, %call36(getdigapfreq_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.diaf1) move $s4, $s1 - move $a1, $s1 + ld.d $s1, $sp, 384 # 8-byte Folded Reload + move $a1, $s4 move $a2, $s2 + move $s3, $s7 move $a3, $s7 ld.d $a4, $sp, 152 # 8-byte Folded Reload move $a5, $s0 - ld.d $a6, $sp, 224 # 8-byte Folded Reload + ld.d $a6, $sp, 216 # 8-byte Folded Reload pcaddu18i $ra, %call36(getdiaminofreq_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $sp, 240 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.diaf2) move $a1, $s8 move $a2, $fp - ld.d $s1, $sp, 232 # 8-byte Folded Reload - move $a3, $s1 - ld.d $a4, $sp, 384 # 8-byte Folded Reload + ld.d $s7, $sp, 224 # 8-byte Folded Reload + move $a3, $s7 + move $a4, $s1 move $a5, $s6 ld.d $s6, $sp, 152 # 8-byte Folded Reload - move $a6, $s3 + move $a6, $s5 pcaddu18i $ra, %call36(getdiaminofreq_part) jirl $ra, $ra, 0 ld.d $a0, $sp, 280 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.gapf1) move $a1, $s4 move $a2, $s2 - move $a3, $s7 + move $a3, $s3 move $a4, $s6 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 @@ -1774,34 +1786,33 @@ partQ__align: # @partQ__align ld.d $a0, $a0, %pc_lo12(partQ__align.gapf2) move $a1, $s8 move $a2, $fp - move $a3, $s1 - ld.d $a4, $sp, 384 # 8-byte Folded Reload + move $a3, $s7 + move $a4, $s1 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 ld.d $a0, $sp, 392 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.gapz1) move $a1, $s4 move $a2, $s2 - move $a3, $s7 + move $a3, $s3 move $a4, $s6 - move $s3, $s0 - ld.d $s0, $sp, 384 # 8-byte Folded Reload - move $a5, $s3 + move $a5, $s0 pcaddu18i $ra, %call36(getgapfreq_zure_part) jirl $ra, $ra, 0 ld.d $a0, $sp, 360 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.gapz2) move $a1, $s8 move $a2, $fp - move $a3, $s1 - move $a4, $s0 - move $a5, $s3 + move $a3, $s7 + move $a4, $s1 + move $a5, $s0 pcaddu18i $ra, %call36(getgapfreq_zure_part) jirl $ra, $ra, 0 b .LBB4_32 .LBB4_31: - move $a1, $s1 + move $a1, $s7 move $a2, $s2 + ld.d $s4, $sp, 344 # 8-byte Folded Reload move $a3, $s4 move $a4, $s6 pcaddu18i $ra, %call36(st_OpeningGapCount) @@ -1811,12 +1822,12 @@ partQ__align: # @partQ__align move $a1, $s8 move $a2, $fp move $a3, $s3 - move $a4, $s0 + move $a4, $s1 pcaddu18i $ra, %call36(st_OpeningGapCount) jirl $ra, $ra, 0 ld.d $a0, $sp, 168 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.fgcp1g) - move $a1, $s1 + move $a1, $s7 move $a2, $s2 move $a3, $s4 move $a4, $s6 @@ -1827,12 +1838,12 @@ partQ__align: # @partQ__align move $a1, $s8 move $a2, $fp move $a3, $s3 - move $a4, $s0 + move $a4, $s1 pcaddu18i $ra, %call36(st_FinalGapCount_zure) jirl $ra, $ra, 0 ld.d $a0, $sp, 376 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.digf1) - move $a1, $s1 + move $a1, $s7 move $a2, $s2 move $a3, $s4 move $a4, $s6 @@ -1843,28 +1854,28 @@ partQ__align: # @partQ__align move $a1, $s8 move $a2, $fp move $a3, $s3 - move $a4, $s0 + move $a4, $s1 pcaddu18i $ra, %call36(getdigapfreq_st) jirl $ra, $ra, 0 - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.diaf1) - move $a1, $s1 + move $a1, $s7 move $a2, $s2 move $a3, $s4 move $a4, $s6 pcaddu18i $ra, %call36(getdiaminofreq_x) jirl $ra, $ra, 0 - ld.d $a0, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $sp, 240 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.diaf2) move $a1, $s8 move $a2, $fp move $a3, $s3 - move $a4, $s0 + move $a4, $s1 pcaddu18i $ra, %call36(getdiaminofreq_x) jirl $ra, $ra, 0 ld.d $a0, $sp, 280 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.gapf1) - move $a1, $s1 + move $a1, $s7 move $a2, $s2 move $a3, $s4 move $a4, $s6 @@ -1875,12 +1886,12 @@ partQ__align: # @partQ__align move $a1, $s8 move $a2, $fp move $a3, $s3 - move $a4, $s0 + move $a4, $s1 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 ld.d $a0, $sp, 392 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.gapz1) - move $a1, $s1 + move $a1, $s7 move $a2, $s2 move $a3, $s4 move $a4, $s6 @@ -1891,15 +1902,15 @@ partQ__align: # @partQ__align move $a1, $s8 move $a2, $fp move $a3, $s3 - move $a4, $s0 + move $a4, $s1 pcaddu18i $ra, %call36(getgapfreq_zure) jirl $ra, $ra, 0 .LBB4_32: ld.d $s7, $sp, 128 # 8-byte Folded Reload ld.d $s3, $sp, 48 # 8-byte Folded Reload addi.w $ra, $zero, -1 - ffint.s.w $ft11, $fs0 - blt $s0, $ra, .LBB4_37 + ffint.s.w $ft10, $fs0 + blt $s1, $ra, .LBB4_37 # %bb.33: # %.lr.ph650 ld.d $a0, $sp, 208 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.ogcp2g) @@ -1922,7 +1933,7 @@ partQ__align: # @partQ__align addi.d $t3, $s3, 2 bstrpick.d $t1, $t3, 31, 0 ori $t2, $zero, 24 - fcvt.d.s $fa0, $ft11 + fcvt.d.s $fa0, $ft10 bgeu $t1, $t2, .LBB4_79 # %bb.34: move $t2, $zero @@ -2019,7 +2030,7 @@ partQ__align: # @partQ__align addi.d $t3, $s7, 2 bstrpick.d $t1, $t3, 31, 0 ori $t2, $zero, 24 - fcvt.d.s $fa0, $ft11 + fcvt.d.s $fa0, $ft10 bgeu $t1, $t2, .LBB4_112 # %bb.39: move $t2, $zero @@ -2093,8 +2104,8 @@ partQ__align: # @partQ__align addi.d $a0, $a0, 4 bnez $t1, .LBB4_41 .LBB4_42: # %._crit_edge655 - xvst $xr19, $sp, 240 # 32-byte Folded Spill - st.d $ra, $sp, 24 # 8-byte Folded Spill + xvst $xr18, $sp, 240 # 32-byte Folded Spill + st.d $ra, $sp, 16 # 8-byte Folded Spill ld.d $a0, $sp, 576 st.d $a0, $sp, 288 # 8-byte Folded Spill ld.d $a0, $sp, 568 @@ -2102,20 +2113,20 @@ partQ__align: # @partQ__align ld.d $a0, $sp, 552 st.d $a0, $sp, 376 # 8-byte Folded Spill ld.d $s0, $sp, 536 - ld.d $a0, $sp, 216 # 8-byte Folded Reload + ld.d $a0, $sp, 24 # 8-byte Folded Reload ld.d $s7, $a0, %pc_lo12(partQ__align.w1) - ld.d $a0, $sp, 336 # 8-byte Folded Reload + ld.d $a0, $sp, 328 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.w2) st.d $a0, $sp, 392 # 8-byte Folded Spill - ld.d $a0, $sp, 328 # 8-byte Folded Reload + ld.d $a0, $sp, 320 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(partQ__align.initverticalw) ld.d $a0, $sp, 368 # 8-byte Folded Reload ld.d $s6, $a0, %pc_lo12(partQ__align.cpmx2) ld.d $a0, $sp, 400 # 8-byte Folded Reload ld.d $s5, $a0, %pc_lo12(partQ__align.cpmx1) - ld.d $a0, $sp, 320 # 8-byte Folded Reload - ld.d $fp, $a0, %pc_lo12(partQ__align.floatwork) ld.d $a0, $sp, 312 # 8-byte Folded Reload + ld.d $fp, $a0, %pc_lo12(partQ__align.floatwork) + ld.d $a0, $sp, 272 # 8-byte Folded Reload ld.d $s1, $a0, %pc_lo12(partQ__align.intwork) ori $a7, $zero, 1 st.d $a1, $sp, 400 # 8-byte Folded Spill @@ -2223,7 +2234,7 @@ partQ__align: # @partQ__align ori $a3, $zero, 1 slli.d $a1, $t8, 32 lu12i.w $a0, 287172 - xvld $xr9, $sp, 240 # 32-byte Folded Reload + xvld $xr10, $sp, 240 # 32-byte Folded Reload bne $a2, $a3, .LBB4_60 # %bb.51: ld.d $a2, $sp, 304 # 8-byte Folded Reload @@ -2340,7 +2351,7 @@ partQ__align: # @partQ__align addi.d $a7, $a7, 4 bnez $a5, .LBB4_58 .LBB4_59: # %.loopexit639.thread832 - ld.d $a2, $sp, 344 # 8-byte Folded Reload + ld.d $a2, $sp, 336 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(partQ__align.m) st.w $zero, $a2, 0 b .LBB4_75 @@ -2365,73 +2376,54 @@ partQ__align: # @partQ__align ori $a7, $zero, 1 move $a5, $a4 bstrins.d $a5, $a7, 2, 0 + pcalau12i $a7, %pc_hi20(.LCPI4_2) + xvld $xr0, $a7, %pc_lo12(.LCPI4_2) pcalau12i $a7, %pc_hi20(.LCPI4_1) - xvld $xr0, $a7, %pc_lo12(.LCPI4_1) - xvreplgr2vr.w $xr1, $a2 + xvld $xr1, $a7, %pc_lo12(.LCPI4_1) + xvreplgr2vr.w $xr2, $a2 addi.d $a7, $t6, 4 - xvldi $xr2, -800 + xvldi $xr3, -800 move $t0, $a6 .p2align 4, , 16 .LBB4_63: # %vector.body1166 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr1, $xr0 - xvpermi.q $xr4, $xr3, 1 + xvmul.w $xr4, $xr2, $xr0 + xvpermi.q $xr5, $xr4, 1 + vext2xv.d.w $xr5, $xr5 + xvffint.d.l $xr5, $xr5 vext2xv.d.w $xr4, $xr4 + xvld $xr6, $a7, 0 xvffint.d.l $xr4, $xr4 - vext2xv.d.w $xr3, $xr3 - xvld $xr5, $a7, 0 - xvffint.d.l $xr3, $xr3 - xvfmul.d $xr3, $xr3, $xr2 - xvfmul.d $xr4, $xr4, $xr2 - xvpermi.q $xr6, $xr5, 1 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + xvfmul.d $xr4, $xr4, $xr3 + xvfmul.d $xr5, $xr5, $xr3 + xvpermi.q $xr7, $xr6, 1 + vreplvei.w $vr8, $vr7, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr7, 1 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 + vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr8, 16 + xvpermi.q $xr7, $xr9, 2 + vreplvei.w $vr8, $vr6, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 + fcvt.d.s $ft0, $ft0 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvpermi.q $xr6, $xr8, 2 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 + xvfadd.d $xr5, $xr7, $xr5 xvfadd.d $xr4, $xr6, $xr4 - xvfadd.d $xr3, $xr5, $xr3 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpermi.q $xr5, $xr6, 2 - xvst $xr5, $a7, 0 + xvfcvt.s.d $xr4, $xr5, $xr4 + xvperm.w $xr4, $xr4, $xr1 + xvst $xr4, $a7, 0 xvaddi.wu $xr0, $xr0, 8 addi.d $t0, $t0, -8 addi.d $a7, $a7, 32 @@ -2476,74 +2468,55 @@ partQ__align: # @partQ__align ori $a7, $zero, 1 move $a5, $a4 bstrins.d $a5, $a7, 2, 0 + pcalau12i $a7, %pc_hi20(.LCPI4_2) + xvld $xr0, $a7, %pc_lo12(.LCPI4_2) pcalau12i $a7, %pc_hi20(.LCPI4_1) - xvld $xr0, $a7, %pc_lo12(.LCPI4_1) - xvreplgr2vr.w $xr1, $a2 + xvld $xr1, $a7, %pc_lo12(.LCPI4_1) + xvreplgr2vr.w $xr2, $a2 ld.d $a7, $sp, 400 # 8-byte Folded Reload addi.d $a7, $a7, 4 - xvldi $xr2, -800 + xvldi $xr3, -800 move $t0, $a6 .p2align 4, , 16 .LBB4_70: # %vector.body1180 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr1, $xr0 - xvpermi.q $xr4, $xr3, 1 + xvmul.w $xr4, $xr2, $xr0 + xvpermi.q $xr5, $xr4, 1 + vext2xv.d.w $xr5, $xr5 + xvffint.d.l $xr5, $xr5 vext2xv.d.w $xr4, $xr4 + xvld $xr6, $a7, 0 xvffint.d.l $xr4, $xr4 - vext2xv.d.w $xr3, $xr3 - xvld $xr5, $a7, 0 - xvffint.d.l $xr3, $xr3 - xvfmul.d $xr3, $xr3, $xr2 - xvfmul.d $xr4, $xr4, $xr2 - xvpermi.q $xr6, $xr5, 1 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + xvfmul.d $xr4, $xr4, $xr3 + xvfmul.d $xr5, $xr5, $xr3 + xvpermi.q $xr7, $xr6, 1 + vreplvei.w $vr8, $vr7, 3 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr7, 1 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr8, 16 + xvpermi.q $xr7, $xr9, 2 + vreplvei.w $vr8, $vr6, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 + fcvt.d.s $ft0, $ft0 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvpermi.q $xr6, $xr8, 2 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 + xvfadd.d $xr5, $xr7, $xr5 xvfadd.d $xr4, $xr6, $xr4 - xvfadd.d $xr3, $xr5, $xr3 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpermi.q $xr5, $xr6, 2 - xvst $xr5, $a7, 0 + xvfcvt.s.d $xr4, $xr5, $xr4 + xvperm.w $xr4, $xr4, $xr1 + xvst $xr4, $a7, 0 xvaddi.wu $xr0, $xr0, 8 addi.d $t0, $t0, -8 addi.d $a7, $a7, 32 @@ -2572,7 +2545,7 @@ partQ__align: # @partQ__align addi.d $a6, $a6, 4 bnez $a3, .LBB4_73 .LBB4_74: # %.loopexit639 - ld.d $a2, $sp, 344 # 8-byte Folded Reload + ld.d $a2, $sp, 336 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(partQ__align.m) st.w $zero, $a2, 0 blez $t5, .LBB4_146 @@ -2764,9 +2737,11 @@ partQ__align: # @partQ__align bstrpick.d $t2, $t3, 31, 3 slli.d $t2, $t2, 3 xvreplve0.d $xr1, $xr0 - xvldi $xr2, -912 - xvldi $xr3, -928 - xvldi $xr4, -1424 + pcalau12i $t3, %pc_hi20(.LCPI4_1) + xvld $xr2, $t3, %pc_lo12(.LCPI4_1) + xvldi $xr3, -912 + xvldi $xr4, -928 + xvldi $xr5, -1424 move $t3, $a0 move $t4, $a1 move $t5, $a2 @@ -2780,209 +2755,125 @@ partQ__align: # @partQ__align .p2align 4, , 16 .LBB4_110: # %vector.body989 # =>This Inner Loop Header: Depth=1 - xvld $xr6, $t3, 0 - xvpermi.q $xr5, $xr6, 1 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 + xvld $xr7, $t3, 0 + xvpermi.q $xr6, $xr7, 1 + vreplvei.w $vr8, $vr6, 3 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 - fcvt.d.s $fa7, $fa7 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvld $xr11, $t4, 0 - xvpermi.q $xr6, $xr8, 2 - xvfsub.d $xr9, $xr2, $xr5 - xvfsub.d $xr10, $xr2, $xr6 - xvpermi.q $xr7, $xr11, 1 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 vreplvei.w $vr8, $vr7, 3 fcvt.d.s $ft0, $ft0 - vreplvei.w $vr12, $vr7, 2 - fcvt.d.s $ft4, $ft4 - vextrins.d $vr12, $vr8, 16 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 vreplvei.w $vr8, $vr7, 1 fcvt.d.s $ft0, $ft0 vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 vextrins.d $vr7, $vr8, 16 - xvpermi.q $xr7, $xr12, 2 - vreplvei.w $vr8, $vr11, 3 - fcvt.d.s $ft0, $ft0 - vreplvei.w $vr12, $vr11, 2 + xvld $xr11, $t4, 0 + xvpermi.q $xr7, $xr9, 2 + xvfsub.d $xr8, $xr3, $xr6 + xvfsub.d $xr9, $xr3, $xr7 + xvpermi.q $xr10, $xr11, 1 + vreplvei.w $vr12, $vr10, 3 fcvt.d.s $ft4, $ft4 - vextrins.d $vr12, $vr8, 16 - vreplvei.w $vr8, $vr11, 1 - fcvt.d.s $ft5, $ft0 - vreplvei.w $vr8, $vr11, 0 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr13, 16 - xvpermi.q $xr8, $xr12, 2 - xvfsub.d $xr11, $xr9, $xr7 - xvfsub.d $xr12, $xr10, $xr8 - xvfmul.d $xr11, $xr11, $xr1 - xvfmul.d $xr12, $xr12, $xr1 - xvfmul.d $xr11, $xr11, $xr3 - xvfmul.d $xr12, $xr12, $xr3 - xvpickve.d $xr13, $xr11, 1 - fcvt.s.d $ft5, $ft5 - xvpickve.d $xr14, $xr11, 0 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr14, $vr13, 16 - xvpickve.d $xr13, $xr11, 2 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr14, $vr13, 32 - xvpickve.d $xr11, $xr11, 3 - fcvt.s.d $ft3, $ft3 - vextrins.w $vr14, $vr11, 48 - xvpickve.d $xr11, $xr12, 1 - fcvt.s.d $ft3, $ft3 - xvpickve.d $xr13, $xr12, 0 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr13, $vr11, 16 - xvpickve.d $xr11, $xr12, 2 - fcvt.s.d $ft3, $ft3 - vextrins.w $vr13, $vr11, 32 - xvpickve.d $xr11, $xr12, 3 - fcvt.s.d $ft3, $ft3 - xvld $xr12, $t6, 0 - vextrins.w $vr13, $vr11, 48 - xvpermi.q $xr13, $xr14, 2 - xvst $xr13, $t5, 0 - xvpermi.q $xr11, $xr12, 1 - vreplvei.w $vr13, $vr11, 3 + vreplvei.w $vr13, $vr10, 2 fcvt.d.s $ft5, $ft5 - vreplvei.w $vr14, $vr11, 2 - fcvt.d.s $ft6, $ft6 - vextrins.d $vr14, $vr13, 16 - vreplvei.w $vr13, $vr11, 1 - fcvt.d.s $ft7, $ft5 + vextrins.d $vr13, $vr12, 16 + vreplvei.w $vr12, $vr10, 1 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr10, $vr10, 0 + fcvt.d.s $ft2, $ft2 + vextrins.d $vr10, $vr12, 16 + xvpermi.q $xr10, $xr13, 2 + vreplvei.w $vr12, $vr11, 3 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr13, $vr11, 2 + fcvt.d.s $ft5, $ft5 + vextrins.d $vr13, $vr12, 16 + vreplvei.w $vr12, $vr11, 1 + fcvt.d.s $ft4, $ft4 vreplvei.w $vr11, $vr11, 0 - fcvt.d.s $ft5, $ft3 - vextrins.d $vr13, $vr15, 16 - xvpermi.q $xr13, $xr14, 2 - xvfsub.d $xr11, $xr2, $xr13 - xvfsub.d $xr14, $xr11, $xr7 - vreplvei.w $vr15, $vr12, 3 - fcvt.d.s $ft7, $ft7 - vreplvei.w $vr16, $vr12, 2 - fcvt.d.s $ft8, $ft8 - vextrins.d $vr16, $vr15, 16 - vreplvei.w $vr15, $vr12, 1 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr12, 16 + xvpermi.q $xr11, $xr13, 2 + xvfsub.d $xr12, $xr8, $xr10 + xvfsub.d $xr13, $xr9, $xr11 + xvfmul.d $xr12, $xr12, $xr1 + xvfmul.d $xr13, $xr13, $xr1 + xvfmul.d $xr12, $xr12, $xr4 + xvfmul.d $xr13, $xr13, $xr4 + xvld $xr14, $t6, 0 + xvfcvt.s.d $xr12, $xr12, $xr13 + xvperm.w $xr12, $xr12, $xr2 + xvst $xr12, $t5, 0 + xvpermi.q $xr12, $xr14, 1 + vreplvei.w $vr13, $vr12, 3 + fcvt.d.s $ft5, $ft5 + vreplvei.w $vr15, $vr12, 2 fcvt.d.s $ft7, $ft7 + vextrins.d $vr15, $vr13, 16 + vreplvei.w $vr13, $vr12, 1 + fcvt.d.s $ft5, $ft5 vreplvei.w $vr12, $vr12, 0 - fcvt.d.s $ft9, $ft4 - vextrins.d $vr17, $vr15, 16 - xvpermi.q $xr17, $xr16, 2 - xvfsub.d $xr12, $xr2, $xr17 - xvfsub.d $xr15, $xr12, $xr8 - xvfmul.d $xr14, $xr14, $xr1 + fcvt.d.s $ft4, $ft4 + vextrins.d $vr12, $vr13, 16 + xvpermi.q $xr12, $xr15, 2 + xvfsub.d $xr13, $xr3, $xr12 + xvfsub.d $xr15, $xr13, $xr10 + vreplvei.w $vr16, $vr14, 3 + fcvt.d.s $ft8, $ft8 + vreplvei.w $vr17, $vr14, 2 + fcvt.d.s $ft9, $ft9 + vextrins.d $vr17, $vr16, 16 + vreplvei.w $vr16, $vr14, 1 + fcvt.d.s $ft8, $ft8 + vreplvei.w $vr14, $vr14, 0 + fcvt.d.s $ft6, $ft6 + vextrins.d $vr14, $vr16, 16 + xvpermi.q $xr14, $xr17, 2 + xvfsub.d $xr16, $xr3, $xr14 + xvfsub.d $xr17, $xr16, $xr11 xvfmul.d $xr15, $xr15, $xr1 - xvfmul.d $xr14, $xr14, $xr3 - xvfmul.d $xr15, $xr15, $xr3 - xvpickve.d $xr16, $xr14, 1 - fcvt.s.d $ft8, $ft8 - xvpickve.d $xr18, $xr14, 0 - fcvt.s.d $ft10, $ft10 - vextrins.w $vr18, $vr16, 16 - xvpickve.d $xr16, $xr14, 2 - fcvt.s.d $ft8, $ft8 - vextrins.w $vr18, $vr16, 32 - xvpickve.d $xr14, $xr14, 3 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr18, $vr14, 48 - xvpickve.d $xr14, $xr15, 1 - fcvt.s.d $ft6, $ft6 - xvpickve.d $xr16, $xr15, 0 - fcvt.s.d $ft8, $ft8 - vextrins.w $vr16, $vr14, 16 - xvpickve.d $xr14, $xr15, 2 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr16, $vr14, 32 - xvpickve.d $xr14, $xr15, 3 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr16, $vr14, 48 - xvpermi.q $xr16, $xr18, 2 - xvst $xr16, $t7, 0 - xvfadd.d $xr9, $xr9, $xr13 - xvfadd.d $xr10, $xr10, $xr17 - xvfsub.d $xr9, $xr9, $xr7 - xvfsub.d $xr10, $xr10, $xr8 - xvfmul.d $xr9, $xr9, $xr3 - xvfmul.d $xr10, $xr10, $xr3 + xvfmul.d $xr17, $xr17, $xr1 + xvfmul.d $xr15, $xr15, $xr4 + xvfmul.d $xr17, $xr17, $xr4 + xvfcvt.s.d $xr15, $xr15, $xr17 + xvperm.w $xr15, $xr15, $xr2 + xvst $xr15, $t7, 0 + xvfadd.d $xr8, $xr8, $xr12 + xvfadd.d $xr9, $xr9, $xr14 + xvfsub.d $xr8, $xr8, $xr10 + xvfsub.d $xr9, $xr9, $xr11 + xvfmul.d $xr8, $xr8, $xr4 + xvfmul.d $xr9, $xr9, $xr4 + xvfmul.d $xr8, $xr8, $xr1 xvfmul.d $xr9, $xr9, $xr1 - xvfmul.d $xr10, $xr10, $xr1 - xvpickve.d $xr13, $xr9, 1 - fcvt.s.d $ft5, $ft5 - xvpickve.d $xr14, $xr9, 0 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr14, $vr13, 16 - xvpickve.d $xr13, $xr9, 2 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr14, $vr13, 32 - xvpickve.d $xr9, $xr9, 3 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr14, $vr9, 48 - xvpickve.d $xr9, $xr10, 1 - fcvt.s.d $ft1, $ft1 - xvpickve.d $xr13, $xr10, 0 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr13, $vr9, 16 - xvpickve.d $xr9, $xr10, 2 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr13, $vr9, 32 - xvpickve.d $xr9, $xr10, 3 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr13, $vr9, 48 - xvpermi.q $xr13, $xr14, 2 - xvst $xr13, $t8, 0 - xvfadd.d $xr5, $xr11, $xr5 - xvfadd.d $xr6, $xr12, $xr6 - xvfsub.d $xr5, $xr5, $xr7 - xvfsub.d $xr6, $xr6, $xr8 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr6, $xr6, $xr3 - xvfmul.d $xr5, $xr5, $xr1 + xvfcvt.s.d $xr8, $xr8, $xr9 + xvperm.w $xr8, $xr8, $xr2 + xvst $xr8, $t8, 0 + xvfadd.d $xr6, $xr13, $xr6 + xvfadd.d $xr7, $xr16, $xr7 + xvfsub.d $xr6, $xr6, $xr10 + xvfsub.d $xr7, $xr7, $xr11 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr7, $xr7, $xr4 xvfmul.d $xr6, $xr6, $xr1 - xvpickve.d $xr7, $xr5, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr5, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr5, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr8, $vr5, 48 - xvpickve.d $xr5, $xr6, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr7, $xr6, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr5, 16 - xvpickve.d $xr5, $xr6, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 32 - xvpickve.d $xr5, $xr6, 3 - fcvt.s.d $fa5, $fa5 - xvld $xr6, $s3, 0 - vextrins.w $vr7, $vr5, 48 - xvpermi.q $xr7, $xr8, 2 - xvst $xr7, $s0, 0 - xvfsub.s $xr5, $xr4, $xr6 - xvst $xr5, $s4, 0 + xvfmul.d $xr7, $xr7, $xr1 + xvld $xr8, $s3, 0 + xvfcvt.s.d $xr6, $xr6, $xr7 + xvperm.w $xr6, $xr6, $xr2 + xvst $xr6, $s0, 0 + xvfsub.s $xr6, $xr5, $xr8 + xvst $xr6, $s4, 0 addi.d $s5, $s5, -8 addi.d $s4, $s4, 32 addi.d $s3, $s3, 32 @@ -3166,9 +3057,11 @@ partQ__align: # @partQ__align bstrpick.d $t2, $t3, 31, 3 slli.d $t2, $t2, 3 xvreplve0.d $xr1, $xr0 - xvldi $xr2, -912 - xvldi $xr3, -928 - xvldi $xr4, -1424 + pcalau12i $t3, %pc_hi20(.LCPI4_1) + xvld $xr2, $t3, %pc_lo12(.LCPI4_1) + xvldi $xr3, -912 + xvldi $xr4, -928 + xvldi $xr5, -1424 move $t3, $a0 move $t4, $a1 move $t5, $a2 @@ -3182,209 +3075,125 @@ partQ__align: # @partQ__align .p2align 4, , 16 .LBB4_143: # %vector.body1142 # =>This Inner Loop Header: Depth=1 - xvld $xr6, $t3, 0 - xvpermi.q $xr5, $xr6, 1 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 + xvld $xr7, $t3, 0 + xvpermi.q $xr6, $xr7, 1 + vreplvei.w $vr8, $vr6, 3 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 - fcvt.d.s $fa7, $fa7 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvld $xr11, $t4, 0 - xvpermi.q $xr6, $xr8, 2 - xvfsub.d $xr9, $xr2, $xr5 - xvfsub.d $xr10, $xr2, $xr6 - xvpermi.q $xr7, $xr11, 1 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 vreplvei.w $vr8, $vr7, 3 fcvt.d.s $ft0, $ft0 - vreplvei.w $vr12, $vr7, 2 - fcvt.d.s $ft4, $ft4 - vextrins.d $vr12, $vr8, 16 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 vreplvei.w $vr8, $vr7, 1 fcvt.d.s $ft0, $ft0 vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 vextrins.d $vr7, $vr8, 16 - xvpermi.q $xr7, $xr12, 2 - vreplvei.w $vr8, $vr11, 3 - fcvt.d.s $ft0, $ft0 - vreplvei.w $vr12, $vr11, 2 + xvld $xr11, $t4, 0 + xvpermi.q $xr7, $xr9, 2 + xvfsub.d $xr8, $xr3, $xr6 + xvfsub.d $xr9, $xr3, $xr7 + xvpermi.q $xr10, $xr11, 1 + vreplvei.w $vr12, $vr10, 3 fcvt.d.s $ft4, $ft4 - vextrins.d $vr12, $vr8, 16 - vreplvei.w $vr8, $vr11, 1 - fcvt.d.s $ft5, $ft0 - vreplvei.w $vr8, $vr11, 0 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr13, 16 - xvpermi.q $xr8, $xr12, 2 - xvfsub.d $xr11, $xr9, $xr7 - xvfsub.d $xr12, $xr10, $xr8 - xvfmul.d $xr11, $xr11, $xr1 - xvfmul.d $xr12, $xr12, $xr1 - xvfmul.d $xr11, $xr11, $xr3 - xvfmul.d $xr12, $xr12, $xr3 - xvpickve.d $xr13, $xr11, 1 - fcvt.s.d $ft5, $ft5 - xvpickve.d $xr14, $xr11, 0 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr14, $vr13, 16 - xvpickve.d $xr13, $xr11, 2 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr14, $vr13, 32 - xvpickve.d $xr11, $xr11, 3 - fcvt.s.d $ft3, $ft3 - vextrins.w $vr14, $vr11, 48 - xvpickve.d $xr11, $xr12, 1 - fcvt.s.d $ft3, $ft3 - xvpickve.d $xr13, $xr12, 0 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr13, $vr11, 16 - xvpickve.d $xr11, $xr12, 2 - fcvt.s.d $ft3, $ft3 - vextrins.w $vr13, $vr11, 32 - xvpickve.d $xr11, $xr12, 3 - fcvt.s.d $ft3, $ft3 - xvld $xr12, $t6, 0 - vextrins.w $vr13, $vr11, 48 - xvpermi.q $xr13, $xr14, 2 - xvst $xr13, $t5, 0 - xvpermi.q $xr11, $xr12, 1 - vreplvei.w $vr13, $vr11, 3 + vreplvei.w $vr13, $vr10, 2 fcvt.d.s $ft5, $ft5 - vreplvei.w $vr14, $vr11, 2 - fcvt.d.s $ft6, $ft6 - vextrins.d $vr14, $vr13, 16 - vreplvei.w $vr13, $vr11, 1 - fcvt.d.s $ft7, $ft5 + vextrins.d $vr13, $vr12, 16 + vreplvei.w $vr12, $vr10, 1 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr10, $vr10, 0 + fcvt.d.s $ft2, $ft2 + vextrins.d $vr10, $vr12, 16 + xvpermi.q $xr10, $xr13, 2 + vreplvei.w $vr12, $vr11, 3 + fcvt.d.s $ft4, $ft4 + vreplvei.w $vr13, $vr11, 2 + fcvt.d.s $ft5, $ft5 + vextrins.d $vr13, $vr12, 16 + vreplvei.w $vr12, $vr11, 1 + fcvt.d.s $ft4, $ft4 vreplvei.w $vr11, $vr11, 0 - fcvt.d.s $ft5, $ft3 - vextrins.d $vr13, $vr15, 16 - xvpermi.q $xr13, $xr14, 2 - xvfsub.d $xr11, $xr2, $xr13 - xvfsub.d $xr14, $xr11, $xr7 - vreplvei.w $vr15, $vr12, 3 - fcvt.d.s $ft7, $ft7 - vreplvei.w $vr16, $vr12, 2 - fcvt.d.s $ft8, $ft8 - vextrins.d $vr16, $vr15, 16 - vreplvei.w $vr15, $vr12, 1 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr12, 16 + xvpermi.q $xr11, $xr13, 2 + xvfsub.d $xr12, $xr8, $xr10 + xvfsub.d $xr13, $xr9, $xr11 + xvfmul.d $xr12, $xr12, $xr1 + xvfmul.d $xr13, $xr13, $xr1 + xvfmul.d $xr12, $xr12, $xr4 + xvfmul.d $xr13, $xr13, $xr4 + xvld $xr14, $t6, 0 + xvfcvt.s.d $xr12, $xr12, $xr13 + xvperm.w $xr12, $xr12, $xr2 + xvst $xr12, $t5, 0 + xvpermi.q $xr12, $xr14, 1 + vreplvei.w $vr13, $vr12, 3 + fcvt.d.s $ft5, $ft5 + vreplvei.w $vr15, $vr12, 2 fcvt.d.s $ft7, $ft7 + vextrins.d $vr15, $vr13, 16 + vreplvei.w $vr13, $vr12, 1 + fcvt.d.s $ft5, $ft5 vreplvei.w $vr12, $vr12, 0 - fcvt.d.s $ft9, $ft4 - vextrins.d $vr17, $vr15, 16 - xvpermi.q $xr17, $xr16, 2 - xvfsub.d $xr12, $xr2, $xr17 - xvfsub.d $xr15, $xr12, $xr8 - xvfmul.d $xr14, $xr14, $xr1 + fcvt.d.s $ft4, $ft4 + vextrins.d $vr12, $vr13, 16 + xvpermi.q $xr12, $xr15, 2 + xvfsub.d $xr13, $xr3, $xr12 + xvfsub.d $xr15, $xr13, $xr10 + vreplvei.w $vr16, $vr14, 3 + fcvt.d.s $ft8, $ft8 + vreplvei.w $vr17, $vr14, 2 + fcvt.d.s $ft9, $ft9 + vextrins.d $vr17, $vr16, 16 + vreplvei.w $vr16, $vr14, 1 + fcvt.d.s $ft8, $ft8 + vreplvei.w $vr14, $vr14, 0 + fcvt.d.s $ft6, $ft6 + vextrins.d $vr14, $vr16, 16 + xvpermi.q $xr14, $xr17, 2 + xvfsub.d $xr16, $xr3, $xr14 + xvfsub.d $xr17, $xr16, $xr11 xvfmul.d $xr15, $xr15, $xr1 - xvfmul.d $xr14, $xr14, $xr3 - xvfmul.d $xr15, $xr15, $xr3 - xvpickve.d $xr16, $xr14, 1 - fcvt.s.d $ft8, $ft8 - xvpickve.d $xr18, $xr14, 0 - fcvt.s.d $ft10, $ft10 - vextrins.w $vr18, $vr16, 16 - xvpickve.d $xr16, $xr14, 2 - fcvt.s.d $ft8, $ft8 - vextrins.w $vr18, $vr16, 32 - xvpickve.d $xr14, $xr14, 3 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr18, $vr14, 48 - xvpickve.d $xr14, $xr15, 1 - fcvt.s.d $ft6, $ft6 - xvpickve.d $xr16, $xr15, 0 - fcvt.s.d $ft8, $ft8 - vextrins.w $vr16, $vr14, 16 - xvpickve.d $xr14, $xr15, 2 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr16, $vr14, 32 - xvpickve.d $xr14, $xr15, 3 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr16, $vr14, 48 - xvpermi.q $xr16, $xr18, 2 - xvst $xr16, $t7, 0 - xvfadd.d $xr9, $xr9, $xr13 - xvfadd.d $xr10, $xr10, $xr17 - xvfsub.d $xr9, $xr9, $xr7 - xvfsub.d $xr10, $xr10, $xr8 - xvfmul.d $xr9, $xr9, $xr3 - xvfmul.d $xr10, $xr10, $xr3 + xvfmul.d $xr17, $xr17, $xr1 + xvfmul.d $xr15, $xr15, $xr4 + xvfmul.d $xr17, $xr17, $xr4 + xvfcvt.s.d $xr15, $xr15, $xr17 + xvperm.w $xr15, $xr15, $xr2 + xvst $xr15, $t7, 0 + xvfadd.d $xr8, $xr8, $xr12 + xvfadd.d $xr9, $xr9, $xr14 + xvfsub.d $xr8, $xr8, $xr10 + xvfsub.d $xr9, $xr9, $xr11 + xvfmul.d $xr8, $xr8, $xr4 + xvfmul.d $xr9, $xr9, $xr4 + xvfmul.d $xr8, $xr8, $xr1 xvfmul.d $xr9, $xr9, $xr1 - xvfmul.d $xr10, $xr10, $xr1 - xvpickve.d $xr13, $xr9, 1 - fcvt.s.d $ft5, $ft5 - xvpickve.d $xr14, $xr9, 0 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr14, $vr13, 16 - xvpickve.d $xr13, $xr9, 2 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr14, $vr13, 32 - xvpickve.d $xr9, $xr9, 3 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr14, $vr9, 48 - xvpickve.d $xr9, $xr10, 1 - fcvt.s.d $ft1, $ft1 - xvpickve.d $xr13, $xr10, 0 - fcvt.s.d $ft5, $ft5 - vextrins.w $vr13, $vr9, 16 - xvpickve.d $xr9, $xr10, 2 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr13, $vr9, 32 - xvpickve.d $xr9, $xr10, 3 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr13, $vr9, 48 - xvpermi.q $xr13, $xr14, 2 - xvst $xr13, $t8, 0 - xvfadd.d $xr5, $xr11, $xr5 - xvfadd.d $xr6, $xr12, $xr6 - xvfsub.d $xr5, $xr5, $xr7 - xvfsub.d $xr6, $xr6, $xr8 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr6, $xr6, $xr3 - xvfmul.d $xr5, $xr5, $xr1 + xvfcvt.s.d $xr8, $xr8, $xr9 + xvperm.w $xr8, $xr8, $xr2 + xvst $xr8, $t8, 0 + xvfadd.d $xr6, $xr13, $xr6 + xvfadd.d $xr7, $xr16, $xr7 + xvfsub.d $xr6, $xr6, $xr10 + xvfsub.d $xr7, $xr7, $xr11 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr7, $xr7, $xr4 xvfmul.d $xr6, $xr6, $xr1 - xvpickve.d $xr7, $xr5, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr5, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr5, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr8, $vr5, 48 - xvpickve.d $xr5, $xr6, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr7, $xr6, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr5, 16 - xvpickve.d $xr5, $xr6, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 32 - xvpickve.d $xr5, $xr6, 3 - fcvt.s.d $fa5, $fa5 - xvld $xr6, $s3, 0 - vextrins.w $vr7, $vr5, 48 - xvpermi.q $xr7, $xr8, 2 - xvst $xr7, $s0, 0 - xvfsub.s $xr5, $xr4, $xr6 - xvst $xr5, $s4, 0 + xvfmul.d $xr7, $xr7, $xr1 + xvld $xr8, $s3, 0 + xvfcvt.s.d $xr6, $xr6, $xr7 + xvperm.w $xr6, $xr6, $xr2 + xvst $xr6, $s0, 0 + xvfsub.s $xr6, $xr5, $xr8 + xvst $xr6, $s4, 0 addi.d $s5, $s5, -8 addi.d $s4, $s4, 32 addi.d $s3, $s3, 32 @@ -3401,7 +3210,7 @@ partQ__align: # @partQ__align bne $t1, $t2, .LBB4_40 b .LBB4_42 .LBB4_145: # %.loopexit639.thread - ld.d $a2, $sp, 344 # 8-byte Folded Reload + ld.d $a2, $sp, 336 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(partQ__align.m) st.w $zero, $a2, 0 .LBB4_146: # %._crit_edge673 @@ -3537,7 +3346,7 @@ partQ__align: # @partQ__align andi $a7, $a5, 12 move $a6, $a5 bstrins.d $a6, $zero, 3, 0 - xvreplve0.w $xr0, $xr9 + xvreplve0.w $xr0, $xr10 addi.d $t1, $a3, 36 xvrepli.b $xr1, 0 xvreplgr2vr.w $xr2, $a0 @@ -3569,7 +3378,7 @@ partQ__align: # @partQ__align ori $t1, $zero, 1 move $a7, $a5 bstrins.d $a7, $t1, 1, 0 - vreplvei.w $vr0, $vr9, 0 + vreplvei.w $vr0, $vr10, 0 addi.d $t1, $a3, 4 slli.d $t2, $a6, 2 sub.d $a6, $a6, $t0 @@ -3598,7 +3407,7 @@ partQ__align: # @partQ__align # =>This Inner Loop Header: Depth=1 fldx.s $fa1, $a6, $a5 stx.w $zero, $a3, $a5 - fmadd.s $fa1, $ft1, $fa0, $fa1 + fmadd.s $fa1, $ft2, $fa0, $fa1 fstx.s $fa1, $a2, $a5 addi.d $a4, $a4, -1 addi.d $a5, $a5, 4 @@ -3629,7 +3438,7 @@ partQ__align: # @partQ__align st.d $fp, $sp, 328 # 8-byte Folded Spill st.d $s5, $sp, 336 # 8-byte Folded Spill st.d $s6, $sp, 344 # 8-byte Folded Spill - st.d $s8, $sp, 16 # 8-byte Folded Spill + st.d $s8, $sp, 24 # 8-byte Folded Spill ld.d $a3, $sp, 104 # 8-byte Folded Reload sltu $a3, $zero, $a3 slt $a4, $zero, $t5 @@ -3686,7 +3495,7 @@ partQ__align: # @partQ__align add.d $a1, $a1, $t2 srai.d $a1, $a1, 30 st.d $a1, $sp, 304 # 8-byte Folded Spill - addi.d $s4, $a2, 4 + addi.d $s2, $a2, 4 addi.d $s8, $a3, 4 addi.d $s0, $a4, 4 addi.d $fp, $a6, 4 @@ -3704,11 +3513,11 @@ partQ__align: # @partQ__align .LBB4_176: # %._crit_edge701 # in Loop: Header=BB4_177 Depth=1 ld.d $a2, $sp, 304 # 8-byte Folded Reload - fldx.s $fa0, $s2, $a2 + fldx.s $fa0, $s4, $a2 ld.d $a2, $sp, 360 # 8-byte Folded Reload fstx.s $fa0, $a2, $a0 move $s7, $a1 - move $t6, $s2 + move $t6, $s4 ld.d $a0, $sp, 352 # 8-byte Folded Reload beq $a1, $a0, .LBB4_190 .LBB4_177: # =>This Loop Header: Depth=1 @@ -3718,7 +3527,7 @@ partQ__align: # @partQ__align slli.d $a0, $a0, 2 ld.d $a1, $sp, 400 # 8-byte Folded Reload fldx.s $fa0, $a1, $a0 - move $s2, $t2 + move $s4, $t2 st.d $t6, $sp, 392 # 8-byte Folded Spill fst.s $fa0, $t6, 0 move $a0, $t2 @@ -3745,7 +3554,7 @@ partQ__align: # @partQ__align ld.d $a2, $sp, 232 # 8-byte Folded Reload ldx.d $a1, $a2, $a1 ld.d $a2, $sp, 288 # 8-byte Folded Reload - move $a3, $s2 + move $a3, $s4 ld.d $a4, $sp, 224 # 8-byte Folded Reload .p2align 4, , 16 .LBB4_179: # Parent Loop BB4_177 Depth=1 @@ -3765,7 +3574,7 @@ partQ__align: # @partQ__align # in Loop: Header=BB4_177 Depth=1 ld.d $a1, $sp, 400 # 8-byte Folded Reload fldx.s $fa0, $a1, $a0 - fst.s $fa0, $s2, 0 + fst.s $fa0, $s4, 0 addi.d $a1, $s7, 1 ld.d $t5, $sp, 384 # 8-byte Folded Reload ld.d $t2, $sp, 392 # 8-byte Folded Reload @@ -3800,7 +3609,7 @@ partQ__align: # @partQ__align fldx.s $fa7, $a5, $a0 xvld $xr9, $sp, 240 # 32-byte Folded Reload fmadd.s $ft0, $ft1, $fs1, $ft0 - addi.d $a5, $s2, 4 + addi.d $a5, $s4, 4 addi.d $a6, $a6, 4 addi.d $a7, $zero, -1 b .LBB4_183 @@ -3850,7 +3659,7 @@ partQ__align: # @partQ__align .LBB4_185: # in Loop: Header=BB4_183 Depth=2 add.d $t0, $t4, $a3 fld.s $ft4, $t0, 0 - fldx.s $ft3, $s4, $a3 + fldx.s $ft3, $s2, $a3 fldx.s $ft2, $fp, $a3 fmul.s $ft4, $fa0, $ft4 fadd.s $ft4, $ft3, $ft4 @@ -3868,7 +3677,7 @@ partQ__align: # @partQ__align fcmp.cult.s $fcc0, $ft4, $ft3 bcnez $fcc0, .LBB4_182 # %bb.188: # in Loop: Header=BB4_183 Depth=2 - fstx.s $ft4, $s4, $a3 + fstx.s $ft4, $s2, $a3 stx.w $t6, $s8, $a3 b .LBB4_182 .LBB4_189: @@ -3879,8 +3688,8 @@ partQ__align: # @partQ__align .LBB4_190: # %._crit_edge710.loopexit ld.d $a0, $sp, 8 # 8-byte Folded Reload ld.w $s0, $a0, 0 - move $t6, $s2 - ld.d $s8, $sp, 16 # 8-byte Folded Reload + move $t6, $s4 + ld.d $s8, $sp, 24 # 8-byte Folded Reload ld.d $s5, $sp, 144 # 8-byte Folded Reload ld.d $s2, $sp, 416 # 8-byte Folded Reload ld.d $t7, $sp, 128 # 8-byte Folded Reload @@ -3907,20 +3716,30 @@ partQ__align: # @partQ__align move $a3, $a2 bstrins.d $a3, $a5, 2, 0 xvreplgr2vr.w $xr0, $a0 - pcalau12i $a5, %pc_hi20(.LCPI4_2) - xvld $xr1, $a5, %pc_lo12(.LCPI4_2) pcalau12i $a5, %pc_hi20(.LCPI4_3) - xvld $xr2, $a5, %pc_lo12(.LCPI4_3) + xvld $xr1, $a5, %pc_lo12(.LCPI4_3) + pcalau12i $a5, %pc_hi20(.LCPI4_4) + xvld $xr2, $a5, %pc_lo12(.LCPI4_4) + pcalau12i $a5, %pc_hi20(.LCPI4_1) + xvld $xr3, $a5, %pc_lo12(.LCPI4_1) ld.d $a5, $sp, 48 # 8-byte Folded Reload - xvreplgr2vr.d $xr3, $a5 + xvreplgr2vr.d $xr4, $a5 addi.d $a5, $t6, 4 - xvldi $xr4, -800 + xvldi $xr5, -800 move $a6, $a4 .p2align 4, , 16 .LBB4_194: # %vector.body1303 # =>This Inner Loop Header: Depth=1 - xvsub.d $xr5, $xr3, $xr1 - xvsub.d $xr6, $xr3, $xr2 + xvsub.d $xr6, $xr4, $xr1 + xvsub.d $xr7, $xr4, $xr2 + xvpickve2gr.d $a7, $xr7, 0 + vinsgr2vr.w $vr8, $a7, 0 + xvpickve2gr.d $a7, $xr7, 1 + vinsgr2vr.w $vr8, $a7, 1 + xvpickve2gr.d $a7, $xr7, 2 + vinsgr2vr.w $vr8, $a7, 2 + xvpickve2gr.d $a7, $xr7, 3 + vinsgr2vr.w $vr8, $a7, 3 xvpickve2gr.d $a7, $xr6, 0 vinsgr2vr.w $vr7, $a7, 0 xvpickve2gr.d $a7, $xr6, 1 @@ -3929,73 +3748,44 @@ partQ__align: # @partQ__align vinsgr2vr.w $vr7, $a7, 2 xvpickve2gr.d $a7, $xr6, 3 vinsgr2vr.w $vr7, $a7, 3 - xvpickve2gr.d $a7, $xr5, 0 - vinsgr2vr.w $vr6, $a7, 0 - xvpickve2gr.d $a7, $xr5, 1 - vinsgr2vr.w $vr6, $a7, 1 - xvpickve2gr.d $a7, $xr5, 2 - vinsgr2vr.w $vr6, $a7, 2 - xvpickve2gr.d $a7, $xr5, 3 - vinsgr2vr.w $vr6, $a7, 3 - xvpermi.q $xr7, $xr6, 2 - xvmul.w $xr5, $xr0, $xr7 - xvpermi.q $xr6, $xr5, 1 + xvpermi.q $xr8, $xr7, 2 + xvmul.w $xr6, $xr0, $xr8 + xvpermi.q $xr7, $xr6, 1 + vext2xv.d.w $xr7, $xr7 + xvffint.d.l $xr7, $xr7 vext2xv.d.w $xr6, $xr6 + xvld $xr8, $a5, 0 xvffint.d.l $xr6, $xr6 - vext2xv.d.w $xr5, $xr5 - xvld $xr7, $a5, 0 - xvffint.d.l $xr5, $xr5 - xvfmul.d $xr5, $xr5, $xr4 - xvfmul.d $xr6, $xr6, $xr4 - xvpermi.q $xr8, $xr7, 1 - vreplvei.w $vr9, $vr8, 3 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr10, $vr8, 2 + xvfmul.d $xr6, $xr6, $xr5 + xvfmul.d $xr7, $xr7, $xr5 + xvpermi.q $xr9, $xr8, 1 + vreplvei.w $vr10, $vr9, 3 fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr9, 16 - vreplvei.w $vr9, $vr8, 1 + vreplvei.w $vr11, $vr9, 2 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr10, 16 + vreplvei.w $vr10, $vr9, 1 + fcvt.d.s $ft2, $ft2 + vreplvei.w $vr9, $vr9, 0 fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr10, 16 + xvpermi.q $xr9, $xr11, 2 + vreplvei.w $vr10, $vr8, 3 + fcvt.d.s $ft2, $ft2 + vreplvei.w $vr11, $vr8, 2 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr10, 16 + vreplvei.w $vr10, $vr8, 1 + fcvt.d.s $ft2, $ft2 vreplvei.w $vr8, $vr8, 0 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr9, 16 - xvpermi.q $xr8, $xr10, 2 - vreplvei.w $vr9, $vr7, 3 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr10, $vr7, 2 - fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr9, 16 - vreplvei.w $vr9, $vr7, 1 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr7, $vr7, 0 - fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr9, 16 - xvpermi.q $xr7, $xr10, 2 + vextrins.d $vr8, $vr10, 16 + xvpermi.q $xr8, $xr11, 2 + xvfadd.d $xr7, $xr9, $xr7 xvfadd.d $xr6, $xr8, $xr6 - xvfadd.d $xr5, $xr7, $xr5 - xvpickve.d $xr7, $xr6, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr6, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr6, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr6, $xr6, 3 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr8, $vr6, 48 - xvpickve.d $xr6, $xr5, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr5, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr5, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 48 - xvpermi.q $xr7, $xr8, 2 - xvst $xr7, $a5, 0 + xvfcvt.s.d $xr6, $xr7, $xr6 + xvperm.w $xr6, $xr6, $xr3 + xvst $xr6, $a5, 0 xvaddi.du $xr2, $xr2, 8 xvaddi.du $xr1, $xr1, 8 addi.d $a6, $a6, -8 @@ -4052,8 +3842,8 @@ partQ__align: # @partQ__align move $a0, $a3 bstrins.d $a0, $a5, 1, 0 xvreplve0.d $xr2, $xr0 - pcalau12i $a5, %pc_hi20(.LCPI4_4) - vld $vr3, $a5, %pc_lo12(.LCPI4_4) + pcalau12i $a5, %pc_hi20(.LCPI4_5) + vld $vr3, $a5, %pc_lo12(.LCPI4_5) xvreplve0.d $xr4, $xr1 addi.d $a5, $a1, 4 xvldi $xr5, -800 @@ -4078,18 +3868,9 @@ partQ__align: # @partQ__align vextrins.d $vr7, $vr8, 16 xvpermi.q $xr7, $xr9, 2 xvfmadd.d $xr6, $xr4, $xr6, $xr7 - xvpickve.d $xr7, $xr6, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr6, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr6, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr6, $xr6, 3 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr8, $vr6, 48 - vst $vr8, $a5, 0 + xvpermi.q $xr7, $xr6, 1 + vfcvt.s.d $vr6, $vr7, $vr6 + vst $vr6, $a5, 0 vaddi.wu $vr3, $vr3, 4 addi.d $a6, $a6, -4 addi.d $a5, $a5, 16 @@ -4177,7 +3958,7 @@ partQ__align: # @partQ__align ori $a4, $zero, 1 move $a5, $s0 ld.d $s0, $sp, 408 # 8-byte Folded Reload - ld.d $ra, $sp, 24 # 8-byte Folded Reload + ld.d $ra, $sp, 16 # 8-byte Folded Reload beq $a5, $a4, .LBB4_241 # %bb.213: fld.s $fa0, $fp, 0 @@ -4245,8 +4026,8 @@ partQ__align: # @partQ__align move $a5, $zero b .LBB4_229 .LBB4_226: # %vector.ph1383 - pcalau12i $a6, %pc_hi20(.LCPI4_5) - vld $vr0, $a6, %pc_lo12(.LCPI4_5) + pcalau12i $a6, %pc_hi20(.LCPI4_6) + vld $vr0, $a6, %pc_lo12(.LCPI4_6) bstrpick.d $a5, $a5, 31, 2 slli.d $a5, $a5, 2 addi.d $a6, $s7, 16 @@ -4341,8 +4122,8 @@ partQ__align: # @partQ__align move $a4, $zero b .LBB4_247 .LBB4_244: # %vector.ph1334 - pcalau12i $a5, %pc_hi20(.LCPI4_5) - vld $vr0, $a5, %pc_lo12(.LCPI4_5) + pcalau12i $a5, %pc_hi20(.LCPI4_6) + vld $vr0, $a5, %pc_lo12(.LCPI4_6) bstrpick.d $a4, $a4, 31, 2 slli.d $a4, $a4, 2 addi.d $a5, $s7, 16 @@ -4403,8 +4184,8 @@ partQ__align: # @partQ__align andi $a7, $a6, 12 bstrpick.d $a5, $a6, 31, 4 slli.d $a5, $a5, 4 - pcalau12i $t0, %pc_hi20(.LCPI4_6) - xvld $xr0, $t0, %pc_lo12(.LCPI4_6) + pcalau12i $t0, %pc_hi20(.LCPI4_7) + xvld $xr0, $t0, %pc_lo12(.LCPI4_7) addi.d $t0, $a3, 32 xvrepli.b $xr1, -1 xvrepli.w $xr2, -9 @@ -4426,8 +4207,8 @@ partQ__align: # @partQ__align beqz $a7, .LBB4_261 .LBB4_258: # %vec.epilog.ph1418 move $a7, $a5 - pcalau12i $a5, %pc_hi20(.LCPI4_7) - vld $vr0, $a5, %pc_lo12(.LCPI4_7) + pcalau12i $a5, %pc_hi20(.LCPI4_8) + vld $vr0, $a5, %pc_lo12(.LCPI4_8) bstrpick.d $a5, $a6, 31, 2 slli.d $a5, $a5, 2 vreplgr2vr.w $vr1, $a7 @@ -4739,8 +4520,8 @@ partQ__align: # @partQ__align andi $a6, $a5, 12 bstrpick.d $a4, $a5, 31, 4 slli.d $a4, $a4, 4 - pcalau12i $a7, %pc_hi20(.LCPI4_6) - xvld $xr0, $a7, %pc_lo12(.LCPI4_6) + pcalau12i $a7, %pc_hi20(.LCPI4_7) + xvld $xr0, $a7, %pc_lo12(.LCPI4_7) addi.d $a7, $a2, 32 xvrepli.b $xr1, -1 xvrepli.w $xr2, -9 @@ -4762,8 +4543,8 @@ partQ__align: # @partQ__align beqz $a6, .LBB4_310 .LBB4_307: # %vec.epilog.ph1368 move $a6, $a4 - pcalau12i $a4, %pc_hi20(.LCPI4_7) - vld $vr0, $a4, %pc_lo12(.LCPI4_7) + pcalau12i $a4, %pc_hi20(.LCPI4_8) + vld $vr0, $a4, %pc_lo12(.LCPI4_8) bstrpick.d $a4, $a5, 31, 2 slli.d $a4, $a4, 2 vreplgr2vr.w $vr1, $a6 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partSalignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partSalignmm.s index 2fd495c..98a20bc 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partSalignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partSalignmm.s @@ -970,6 +970,15 @@ part_imp_match_init: # @part_imp_match_init .section .rodata.cst32,"aM",@progbits,32 .p2align 5, 0x0 .LCPI4_1: + .word 0 # 0x0 + .word 1 # 0x1 + .word 4 # 0x4 + .word 5 # 0x5 + .word 2 # 0x2 + .word 3 # 0x3 + .word 6 # 0x6 + .word 7 # 0x7 +.LCPI4_2: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 @@ -978,17 +987,17 @@ part_imp_match_init: # @part_imp_match_init .word 6 # 0x6 .word 7 # 0x7 .word 8 # 0x8 -.LCPI4_2: +.LCPI4_3: .dword 5 # 0x5 .dword 6 # 0x6 .dword 7 # 0x7 .dword 8 # 0x8 -.LCPI4_3: +.LCPI4_4: .dword 1 # 0x1 .dword 2 # 0x2 .dword 3 # 0x3 .dword 4 # 0x4 -.LCPI4_6: +.LCPI4_7: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -999,15 +1008,15 @@ part_imp_match_init: # @part_imp_match_init .word 7 # 0x7 .section .rodata.cst16,"aM",@progbits,16 .p2align 4, 0x0 -.LCPI4_4: +.LCPI4_5: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 .word 4 # 0x4 -.LCPI4_5: +.LCPI4_6: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI4_7: +.LCPI4_8: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -1081,20 +1090,21 @@ partA__align: # @partA__align st.d $a1, $sp, 176 # 8-byte Folded Spill ld.w $s7, $a1, %pc_lo12(partA__align.orlgth2) st.d $a0, $sp, 88 # 8-byte Folded Spill - addi.w $a1, $a0, 0 + addi.w $s8, $a0, 0 pcalau12i $a0, %pc_hi20(partA__align.w1) - st.d $a0, $sp, 216 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(partA__align.w2) st.d $a0, $sp, 256 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(partA__align.initverticalw) + pcalau12i $a0, %pc_hi20(partA__align.w2) st.d $a0, $sp, 248 # 8-byte Folded Spill + pcalau12i $a0, %pc_hi20(partA__align.initverticalw) + st.d $a0, $sp, 232 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partA__align.lastverticalw) st.d $a0, $sp, 240 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partA__align.m) st.d $a0, $sp, 136 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partA__align.mp) st.d $a0, $sp, 128 # 8-byte Folded Spill - pcalau12i $s8, %pc_hi20(partA__align.mseq) + pcalau12i $a0, %pc_hi20(partA__align.mseq) + st.d $a0, $sp, 272 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partA__align.ogcp1) st.d $a0, $sp, 208 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partA__align.ogcp2) @@ -1108,36 +1118,39 @@ partA__align: # @partA__align pcalau12i $a0, %pc_hi20(partA__align.cpmx2) st.d $a0, $sp, 264 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partA__align.floatwork) - st.d $a0, $sp, 232 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(partA__align.intwork) st.d $a0, $sp, 224 # 8-byte Folded Spill + pcalau12i $a0, %pc_hi20(partA__align.intwork) + st.d $a0, $sp, 216 # 8-byte Folded Spill st.d $s6, $sp, 96 # 8-byte Folded Spill st.d $s3, $sp, 32 # 8-byte Folded Spill - st.d $a1, $sp, 272 # 8-byte Folded Spill st.d $s5, $sp, 168 # 8-byte Folded Spill st.d $s0, $sp, 56 # 8-byte Folded Spill - blt $s4, $s3, .LBB4_4 + blt $s4, $s3, .LBB4_5 # %bb.3: - bge $s7, $a1, .LBB4_9 -.LBB4_4: + blt $s7, $s8, .LBB4_5 +# %bb.4: + ld.d $s0, $sp, 272 # 8-byte Folded Reload + bgtz $s1, .LBB4_10 + b .LBB4_17 +.LBB4_5: pcalau12i $s6, %pc_hi20(partA__align.match) - blez $s4, .LBB4_7 -# %bb.5: - ld.d $s3, $sp, 88 # 8-byte Folded Reload - blez $s7, .LBB4_8 + blez $s4, .LBB4_8 # %bb.6: - ld.d $a0, $sp, 216 # 8-byte Folded Reload + ld.d $s3, $sp, 88 # 8-byte Folded Reload + blez $s7, .LBB4_9 +# %bb.7: + ld.d $a0, $sp, 256 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partA__align.w1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 256 # 8-byte Folded Reload + ld.d $a0, $sp, 248 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partA__align.w2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 ld.d $a0, $s6, %pc_lo12(partA__align.match) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 248 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partA__align.initverticalw) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 @@ -1153,7 +1166,8 @@ partA__align: # @partA__align ld.d $a0, $a0, %pc_lo12(partA__align.mp) pcaddu18i $ra, %call36(FreeIntVec) jirl $ra, $ra, 0 - ld.d $a0, $s8, %pc_lo12(partA__align.mseq) + ld.d $a0, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $a0, %pc_lo12(partA__align.mseq) pcaddu18i $ra, %call36(FreeCharMtx) jirl $ra, $ra, 0 ld.d $a0, $sp, 208 # 8-byte Folded Reload @@ -1180,21 +1194,21 @@ partA__align: # @partA__align ld.d $a0, $a0, %pc_lo12(partA__align.cpmx2) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 232 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partA__align.floatwork) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 224 # 8-byte Folded Reload + ld.d $a0, $sp, 216 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partA__align.intwork) pcaddu18i $ra, %call36(FreeIntMtx) jirl $ra, $ra, 0 ld.w $s4, $s5, %pc_lo12(partA__align.orlgth1) ld.d $a0, $sp, 176 # 8-byte Folded Reload ld.w $s7, $a0, %pc_lo12(partA__align.orlgth2) - b .LBB4_8 -.LBB4_7: - ld.d $s3, $sp, 88 # 8-byte Folded Reload + b .LBB4_9 .LBB4_8: + ld.d $s3, $sp, 88 # 8-byte Folded Reload +.LBB4_9: pcalau12i $a0, %pc_hi20(.LCPI4_0) fld.d $fa0, $a0, %pc_lo12(.LCPI4_0) movgr2fr.w $fa1, $s0 @@ -1207,6 +1221,7 @@ partA__align: # @partA__align maskeqz $a1, $s4, $a1 or $s4, $a1, $a0 addi.w $s0, $s4, 100 + st.d $s0, $sp, 120 # 8-byte Folded Spill movgr2fr.w $fa1, $s3 ffint.d.w $fa1, $fa1 fmul.d $fa0, $fa1, $fa0 @@ -1221,12 +1236,12 @@ partA__align: # @partA__align move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 216 # 8-byte Folded Reload + ld.d $a1, $sp, 256 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partA__align.w1) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 256 # 8-byte Folded Reload + ld.d $a1, $sp, 248 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partA__align.w2) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -1236,7 +1251,7 @@ partA__align: # @partA__align move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 248 # 8-byte Folded Reload + ld.d $a1, $sp, 232 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partA__align.initverticalw) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -1259,7 +1274,8 @@ partA__align: # @partA__align add.w $a1, $s3, $s0 pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 - st.d $a0, $s8, %pc_lo12(partA__align.mseq) + ld.d $s0, $sp, 272 # 8-byte Folded Reload + st.d $a0, $s0, %pc_lo12(partA__align.mseq) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 @@ -1292,32 +1308,32 @@ partA__align: # @partA__align jirl $ra, $ra, 0 ld.d $a1, $sp, 264 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partA__align.cpmx2) - slt $a0, $s3, $s0 + ld.d $a2, $sp, 120 # 8-byte Folded Reload + slt $a0, $s3, $a2 masknez $a1, $s3, $a0 - maskeqz $a0, $s0, $a0 + maskeqz $a0, $a2, $a0 or $a0, $a0, $a1 addi.w $s5, $a0, 2 ori $a1, $zero, 26 move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 232 # 8-byte Folded Reload + ld.d $a1, $sp, 224 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partA__align.floatwork) ori $a1, $zero, 26 move $a0, $s5 pcaddu18i $ra, %call36(AllocateIntMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 224 # 8-byte Folded Reload + ld.d $a1, $sp, 216 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partA__align.intwork) ld.d $a0, $sp, 168 # 8-byte Folded Reload st.w $s4, $a0, %pc_lo12(partA__align.orlgth1) ld.d $a0, $sp, 176 # 8-byte Folded Reload st.w $s7, $a0, %pc_lo12(partA__align.orlgth2) ld.d $s6, $sp, 96 # 8-byte Folded Reload -.LBB4_9: blez $s1, .LBB4_17 -# %bb.10: # %.lr.ph - ld.d $a0, $s8, %pc_lo12(partA__align.mseq) +.LBB4_10: # %.lr.ph + ld.d $a0, $s0, %pc_lo12(partA__align.mseq) ld.d $a1, $sp, 72 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(partA__align.mseq1) ori $a3, $zero, 8 @@ -1362,7 +1378,7 @@ partA__align: # @partA__align .LBB4_17: # %.preheader459 blez $fp, .LBB4_23 # %bb.18: # %.lr.ph463 - ld.d $a0, $s8, %pc_lo12(partA__align.mseq) + ld.d $a0, $s0, %pc_lo12(partA__align.mseq) ld.d $a1, $sp, 80 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(partA__align.mseq2) ori $a2, $zero, 8 @@ -1371,7 +1387,7 @@ partA__align: # @partA__align alsl.d $a2, $s1, $a0, 3 sub.d $a2, $a1, $a2 ori $a3, $zero, 64 - bgeu $a2, $a3, .LBB4_165 + bgeu $a2, $a3, .LBB4_164 .LBB4_20: move $a2, $zero .LBB4_21: # %scalar.ph643.preheader @@ -1452,18 +1468,17 @@ partA__align: # @partA__align move $a0, $s2 ld.d $s3, $sp, 192 # 8-byte Folded Reload move $a2, $s3 - ld.d $s4, $sp, 32 # 8-byte Folded Reload - move $a3, $s4 + ld.d $s0, $sp, 32 # 8-byte Folded Reload + move $a3, $s0 move $a4, $s1 pcaddu18i $ra, %call36(cpmx_calc_new) jirl $ra, $ra, 0 ld.d $a0, $sp, 264 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(partA__align.cpmx2) move $a0, $s6 - ld.d $s8, $sp, 200 # 8-byte Folded Reload - move $a2, $s8 - ld.d $s0, $sp, 272 # 8-byte Folded Reload - move $a3, $s0 + ld.d $s4, $sp, 200 # 8-byte Folded Reload + move $a2, $s4 + move $a3, $s8 move $a4, $fp pcaddu18i $ra, %call36(cpmx_calc_new) jirl $ra, $ra, 0 @@ -1478,15 +1493,15 @@ partA__align: # @partA__align move $a1, $s1 move $a2, $s2 move $a3, $s3 - move $a4, $s4 + move $a4, $s0 pcaddu18i $ra, %call36(new_OpeningGapCount) jirl $ra, $ra, 0 ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partA__align.ogcp2) move $a1, $fp ld.d $a2, $sp, 96 # 8-byte Folded Reload - move $a3, $s8 - move $a4, $s0 + move $a3, $s4 + move $a4, $s8 move $a5, $s5 pcaddu18i $ra, %call36(new_OpeningGapCount) jirl $ra, $ra, 0 @@ -1495,7 +1510,7 @@ partA__align: # @partA__align move $a1, $s1 move $a2, $s2 move $a3, $s3 - move $a4, $s4 + move $a4, $s0 move $a5, $s7 pcaddu18i $ra, %call36(new_FinalGapCount) jirl $ra, $ra, 0 @@ -1503,26 +1518,30 @@ partA__align: # @partA__align ld.d $a0, $a0, %pc_lo12(partA__align.fgcp2) move $a1, $fp ld.d $a2, $sp, 96 # 8-byte Folded Reload - move $a3, $s8 - move $a4, $s0 + move $a3, $s4 + move $a4, $s8 move $a5, $s6 ld.d $s6, $sp, 96 # 8-byte Folded Reload pcaddu18i $ra, %call36(new_FinalGapCount) jirl $ra, $ra, 0 - b .LBB4_33 + ld.d $a7, $sp, 56 # 8-byte Folded Reload + ld.d $t0, $sp, 88 # 8-byte Folded Reload + fcvt.d.s $fa0, $fs0 + bgtz $s0, .LBB4_33 + b .LBB4_39 .LBB4_32: move $a1, $s1 move $a2, $s2 move $a3, $s3 - move $a4, $s4 + move $a4, $s0 pcaddu18i $ra, %call36(st_OpeningGapCount) jirl $ra, $ra, 0 ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partA__align.ogcp2) move $a1, $fp move $a2, $s6 - move $a3, $s8 - move $a4, $s0 + move $a3, $s4 + move $a4, $s8 pcaddu18i $ra, %call36(st_OpeningGapCount) jirl $ra, $ra, 0 ld.d $a0, $sp, 160 # 8-byte Folded Reload @@ -1530,47 +1549,45 @@ partA__align: # @partA__align move $a1, $s1 move $a2, $s2 move $a3, $s3 - move $a4, $s4 + move $a4, $s0 pcaddu18i $ra, %call36(st_FinalGapCount) jirl $ra, $ra, 0 ld.d $a0, $sp, 144 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partA__align.fgcp2) move $a1, $fp move $a2, $s6 - move $a3, $s8 - move $a4, $s0 + move $a3, $s4 + move $a4, $s8 pcaddu18i $ra, %call36(st_FinalGapCount) jirl $ra, $ra, 0 -.LBB4_33: ld.d $a7, $sp, 56 # 8-byte Folded Reload ld.d $t0, $sp, 88 # 8-byte Folded Reload fcvt.d.s $fa0, $fs0 - ld.d $t1, $sp, 272 # 8-byte Folded Reload - blez $s4, .LBB4_40 -# %bb.34: # %.lr.ph466 + blez $s0, .LBB4_39 +.LBB4_33: # %.lr.ph466 ld.d $a0, $sp, 208 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partA__align.ogcp1) ld.d $a1, $sp, 160 # 8-byte Folded Reload ld.d $a2, $a1, %pc_lo12(partA__align.fgcp1) bstrpick.d $a1, $a7, 30, 0 ori $a3, $zero, 8 - bltu $a1, $a3, .LBB4_37 -# %bb.35: # %vector.memcheck656 + bltu $a1, $a3, .LBB4_36 +# %bb.34: # %vector.memcheck656 alsl.d $a3, $a1, $a2, 2 - bgeu $a0, $a3, .LBB4_159 -# %bb.36: # %vector.memcheck656 + bgeu $a0, $a3, .LBB4_158 +# %bb.35: # %vector.memcheck656 alsl.d $a3, $a1, $a0, 2 - bgeu $a2, $a3, .LBB4_159 -.LBB4_37: + bgeu $a2, $a3, .LBB4_158 +.LBB4_36: move $a3, $zero -.LBB4_38: # %scalar.ph658.preheader +.LBB4_37: # %scalar.ph658.preheader alsl.d $a2, $a3, $a2, 2 alsl.d $a0, $a3, $a0, 2 sub.d $a1, $a1, $a3 vldi $vr1, -912 vldi $vr2, -928 .p2align 4, , 16 -.LBB4_39: # %scalar.ph658 +.LBB4_38: # %scalar.ph658 # =>This Inner Loop Header: Depth=1 fld.s $fa3, $a0, 0 fcvt.d.s $fa3, $fa3 @@ -1589,33 +1606,33 @@ partA__align: # @partA__align addi.d $a2, $a2, 4 addi.d $a1, $a1, -1 addi.d $a0, $a0, 4 - bnez $a1, .LBB4_39 -.LBB4_40: # %.preheader458 - blez $t1, .LBB4_47 -# %bb.41: # %.lr.ph468 + bnez $a1, .LBB4_38 +.LBB4_39: # %.preheader458 + blez $s8, .LBB4_46 +# %bb.40: # %.lr.ph468 ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partA__align.ogcp2) ld.d $a1, $sp, 144 # 8-byte Folded Reload ld.d $a2, $a1, %pc_lo12(partA__align.fgcp2) bstrpick.d $a1, $t0, 30, 0 ori $a3, $zero, 8 - bltu $a1, $a3, .LBB4_44 -# %bb.42: # %vector.memcheck671 + bltu $a1, $a3, .LBB4_43 +# %bb.41: # %vector.memcheck671 alsl.d $a3, $a1, $a2, 2 - bgeu $a0, $a3, .LBB4_162 -# %bb.43: # %vector.memcheck671 + bgeu $a0, $a3, .LBB4_161 +# %bb.42: # %vector.memcheck671 alsl.d $a3, $a1, $a0, 2 - bgeu $a2, $a3, .LBB4_162 -.LBB4_44: + bgeu $a2, $a3, .LBB4_161 +.LBB4_43: move $a3, $zero -.LBB4_45: # %scalar.ph677.preheader +.LBB4_44: # %scalar.ph677.preheader alsl.d $a2, $a3, $a2, 2 alsl.d $a0, $a3, $a0, 2 sub.d $a1, $a1, $a3 vldi $vr1, -912 vldi $vr2, -928 .p2align 4, , 16 -.LBB4_46: # %scalar.ph677 +.LBB4_45: # %scalar.ph677 # =>This Inner Loop Header: Depth=1 fld.s $fa3, $a0, 0 fcvt.d.s $fa3, $fa3 @@ -1634,89 +1651,93 @@ partA__align: # @partA__align addi.d $a2, $a2, 4 addi.d $a1, $a1, -1 addi.d $a0, $a0, 4 - bnez $a1, .LBB4_46 -.LBB4_47: # %._crit_edge469 + bnez $a1, .LBB4_45 +.LBB4_46: # %._crit_edge469 ld.d $a0, $sp, 432 st.d $a0, $sp, 200 # 8-byte Folded Spill ld.d $a0, $sp, 424 st.d $a0, $sp, 192 # 8-byte Folded Spill - ld.d $s5, $sp, 408 + ld.d $a0, $sp, 408 + st.d $a0, $sp, 272 # 8-byte Folded Spill ld.d $a0, $sp, 392 st.d $a0, $sp, 184 # 8-byte Folded Spill - ld.d $a0, $sp, 216 # 8-byte Folded Reload + ld.d $a0, $sp, 256 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partA__align.w1) st.d $a0, $sp, 48 # 8-byte Folded Spill - ld.d $a0, $sp, 256 # 8-byte Folded Reload + ld.d $a0, $sp, 248 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partA__align.w2) st.d $a0, $sp, 16 # 8-byte Folded Spill - ld.d $a0, $sp, 248 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partA__align.initverticalw) ld.d $a1, $sp, 264 # 8-byte Folded Reload - ld.d $s0, $a1, %pc_lo12(partA__align.cpmx2) + ld.d $s4, $a1, %pc_lo12(partA__align.cpmx2) ld.d $a1, $sp, 280 # 8-byte Folded Reload - ld.d $s8, $a1, %pc_lo12(partA__align.cpmx1) - ld.d $a1, $sp, 232 # 8-byte Folded Reload - ld.d $s3, $a1, %pc_lo12(partA__align.floatwork) + ld.d $s5, $a1, %pc_lo12(partA__align.cpmx1) ld.d $a1, $sp, 224 # 8-byte Folded Reload + ld.d $s3, $a1, %pc_lo12(partA__align.floatwork) + ld.d $a1, $sp, 216 # 8-byte Folded Reload ld.d $s7, $a1, %pc_lo12(partA__align.intwork) ori $a7, $zero, 1 st.d $a0, $sp, 280 # 8-byte Folded Spill - move $a1, $s0 - move $a2, $s8 + move $a1, $s4 + move $a2, $s5 move $a3, $zero - move $a4, $s4 + move $a4, $s0 move $a5, $s3 move $a6, $s7 pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 ld.d $a0, $sp, 64 # 8-byte Folded Reload - st.d $s8, $sp, 256 # 8-byte Folded Spill - st.d $s0, $sp, 264 # 8-byte Folded Spill - beqz $a0, .LBB4_54 -# %bb.48: - move $s8, $s3 - ld.d $s0, $sp, 56 # 8-byte Folded Reload - ld.d $a4, $sp, 272 # 8-byte Folded Reload + st.d $s5, $sp, 256 # 8-byte Folded Spill + st.d $s4, $sp, 264 # 8-byte Folded Spill + beqz $a0, .LBB4_53 +# %bb.47: + move $s5, $s3 + ld.d $t0, $sp, 56 # 8-byte Folded Reload ld.d $s3, $sp, 184 # 8-byte Folded Reload - blez $s4, .LBB4_51 -# %bb.49: # %.lr.ph.i + blez $s0, .LBB4_50 +# %bb.48: # %.lr.ph.i ld.d $a0, $sp, 200 # 8-byte Folded Reload ld.w $a2, $a0, 0 pcalau12i $a0, %pc_hi20(impmtx) ld.d $a0, $a0, %pc_lo12(impmtx) - bstrpick.d $a1, $s0, 30, 0 - add.w $a2, $a2, $s5 + bstrpick.d $a1, $t0, 30, 0 + ld.d $a3, $sp, 272 # 8-byte Folded Reload + add.w $a2, $a2, $a3 slli.d $a2, $a2, 2 ld.d $a3, $sp, 192 # 8-byte Folded Reload - ld.d $a6, $sp, 280 # 8-byte Folded Reload + ld.d $a4, $sp, 280 # 8-byte Folded Reload .p2align 4, , 16 -.LBB4_50: # =>This Inner Loop Header: Depth=1 +.LBB4_49: # =>This Inner Loop Header: Depth=1 ld.w $a5, $a3, 0 add.w $a5, $a5, $s3 slli.d $a5, $a5, 3 ldx.d $a5, $a0, $a5 fldx.s $fa0, $a5, $a2 - fld.s $fa1, $a6, 0 + fld.s $fa1, $a4, 0 fadd.s $fa0, $fa0, $fa1 - fst.s $fa0, $a6, 0 + fst.s $fa0, $a4, 0 addi.d $a1, $a1, -1 - addi.d $a6, $a6, 4 + addi.d $a4, $a4, 4 addi.d $a3, $a3, 4 - bnez $a1, .LBB4_50 -.LBB4_51: # %part_imp_match_out_vead_tate_gapmap.exit + bnez $a1, .LBB4_49 +.LBB4_50: # %part_imp_match_out_vead_tate_gapmap.exit ori $a7, $zero, 1 ld.d $a0, $sp, 48 # 8-byte Folded Reload ld.d $a1, $sp, 256 # 8-byte Folded Reload ld.d $a2, $sp, 264 # 8-byte Folded Reload move $a3, $zero - move $a5, $s8 + move $a4, $s8 + move $a5, $s5 move $a6, $s7 + move $s4, $t0 pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 - ld.d $t6, $sp, 272 # 8-byte Folded Reload + move $t7, $s4 ld.d $t4, $sp, 88 # 8-byte Folded Reload - blez $t6, .LBB4_55 -# %bb.52: # %.lr.ph.i337 + ld.d $a5, $sp, 272 # 8-byte Folded Reload + blez $s8, .LBB4_54 +# %bb.51: # %.lr.ph.i337 ld.d $a0, $sp, 192 # 8-byte Folded Reload ld.w $a0, $a0, 0 pcalau12i $a1, %pc_hi20(impmtx) @@ -1728,9 +1749,9 @@ partA__align: # @partA__align ld.d $a2, $sp, 200 # 8-byte Folded Reload ld.d $a3, $sp, 48 # 8-byte Folded Reload .p2align 4, , 16 -.LBB4_53: # =>This Inner Loop Header: Depth=1 +.LBB4_52: # =>This Inner Loop Header: Depth=1 ld.w $a4, $a2, 0 - add.w $a4, $a4, $s5 + add.w $a4, $a4, $a5 slli.d $a4, $a4, 2 fldx.s $fa0, $a0, $a4 fld.s $fa1, $a3, 0 @@ -1739,52 +1760,50 @@ partA__align: # @partA__align addi.d $a1, $a1, -1 addi.d $a3, $a3, 4 addi.d $a2, $a2, 4 - bnez $a1, .LBB4_53 - b .LBB4_55 -.LBB4_54: # %.critedge + bnez $a1, .LBB4_52 + b .LBB4_54 +.LBB4_53: # %.critedge ori $a7, $zero, 1 ld.d $a0, $sp, 48 # 8-byte Folded Reload - move $a1, $s8 - move $a2, $s0 + move $a1, $s5 + move $a2, $s4 move $a3, $zero - ld.d $s0, $sp, 272 # 8-byte Folded Reload - move $a4, $s0 + move $a4, $s8 move $a5, $s3 move $a6, $s7 pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 - move $t6, $s0 - ld.d $s0, $sp, 56 # 8-byte Folded Reload + ld.d $t7, $sp, 56 # 8-byte Folded Reload ld.d $t4, $sp, 88 # 8-byte Folded Reload - move $s8, $s3 -.LBB4_55: # %part_imp_match_out_vead_gapmap.exit + move $s5, $s3 +.LBB4_54: # %part_imp_match_out_vead_gapmap.exit pcalau12i $a0, %got_pc_hi20(outgap) - ld.d $t7, $a0, %got_pc_lo12(outgap) - ld.w $t0, $t7, 0 + ld.d $t6, $a0, %got_pc_lo12(outgap) + ld.w $t0, $t6, 0 ori $a0, $zero, 1 ld.d $t5, $sp, 16 # 8-byte Folded Reload - bne $t0, $a0, .LBB4_64 -# %bb.56: # %.preheader454 - blez $s4, .LBB4_60 -# %bb.57: # %iter.check + bne $t0, $a0, .LBB4_63 +# %bb.55: # %.preheader454 + blez $s0, .LBB4_59 +# %bb.56: # %iter.check ld.d $a0, $sp, 208 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partA__align.ogcp1) ld.d $a1, $sp, 160 # 8-byte Folded Reload ld.d $a2, $a1, %pc_lo12(partA__align.fgcp1) - addi.d $a1, $s0, 1 + addi.d $a1, $t7, 1 bstrpick.d $a1, $a1, 31, 0 addi.d $a3, $a1, -1 ori $a5, $zero, 4 ori $a4, $zero, 1 - bgeu $a3, $a5, .LBB4_168 -.LBB4_58: # %vec.epilog.scalar.ph.preheader + bgeu $a3, $a5, .LBB4_167 +.LBB4_57: # %vec.epilog.scalar.ph.preheader alsl.d $a2, $a4, $a2, 2 addi.d $a2, $a2, -4 ld.d $a3, $sp, 280 # 8-byte Folded Reload alsl.d $a3, $a4, $a3, 2 sub.d $a1, $a1, $a4 .p2align 4, , 16 -.LBB4_59: # %vec.epilog.scalar.ph +.LBB4_58: # %vec.epilog.scalar.ph # =>This Inner Loop Header: Depth=1 fld.s $fa0, $a0, 0 fld.s $fa1, $a2, 0 @@ -1795,11 +1814,11 @@ partA__align: # @partA__align addi.d $a2, $a2, 4 addi.d $a1, $a1, -1 addi.d $a3, $a3, 4 - bnez $a1, .LBB4_59 -.LBB4_60: # %.preheader452 + bnez $a1, .LBB4_58 +.LBB4_59: # %.preheader452 ld.d $t3, $sp, 48 # 8-byte Folded Reload - blez $t6, .LBB4_87 -# %bb.61: # %iter.check778 + blez $s8, .LBB4_86 +# %bb.60: # %iter.check778 ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partA__align.ogcp2) ld.d $a1, $sp, 144 # 8-byte Folded Reload @@ -1809,14 +1828,14 @@ partA__align: # @partA__align addi.d $a3, $a1, -1 ori $a5, $zero, 4 ori $a4, $zero, 1 - bgeu $a3, $a5, .LBB4_172 -.LBB4_62: # %vec.epilog.scalar.ph796.preheader + bgeu $a3, $a5, .LBB4_171 +.LBB4_61: # %vec.epilog.scalar.ph796.preheader alsl.d $a2, $a4, $a2, 2 addi.d $a2, $a2, -4 alsl.d $a3, $a4, $t3, 2 sub.d $a1, $a1, $a4 .p2align 4, , 16 -.LBB4_63: # %vec.epilog.scalar.ph796 +.LBB4_62: # %vec.epilog.scalar.ph796 # =>This Inner Loop Header: Depth=1 fld.s $fa0, $a0, 0 fld.s $fa1, $a2, 0 @@ -1827,12 +1846,12 @@ partA__align: # @partA__align addi.d $a2, $a2, 4 addi.d $a1, $a1, -1 addi.d $a3, $a3, 4 - bnez $a1, .LBB4_63 - b .LBB4_79 -.LBB4_64: # %.preheader457 + bnez $a1, .LBB4_62 + b .LBB4_78 +.LBB4_63: # %.preheader457 ld.d $t3, $sp, 48 # 8-byte Folded Reload - blez $t6, .LBB4_71 -# %bb.65: # %.lr.ph472 + blez $s8, .LBB4_70 +# %bb.64: # %.lr.ph472 pcalau12i $a0, %got_pc_hi20(offset) ld.d $a0, $a0, %got_pc_lo12(offset) ld.w $a0, $a0, 0 @@ -1841,93 +1860,74 @@ partA__align: # @partA__align addi.d $a2, $a1, -1 ori $a4, $zero, 8 ori $a3, $zero, 1 - bltu $a2, $a4, .LBB4_69 -# %bb.66: # %vector.ph694 + bltu $a2, $a4, .LBB4_68 +# %bb.65: # %vector.ph694 move $a4, $a2 bstrins.d $a4, $zero, 2, 0 ori $a5, $zero, 1 move $a3, $a2 bstrins.d $a3, $a5, 2, 0 + pcalau12i $a5, %pc_hi20(.LCPI4_2) + xvld $xr0, $a5, %pc_lo12(.LCPI4_2) pcalau12i $a5, %pc_hi20(.LCPI4_1) - xvld $xr0, $a5, %pc_lo12(.LCPI4_1) - xvreplgr2vr.w $xr1, $a0 + xvld $xr1, $a5, %pc_lo12(.LCPI4_1) + xvreplgr2vr.w $xr2, $a0 addi.d $a5, $t3, 4 - xvldi $xr2, -800 + xvldi $xr3, -800 move $a6, $a4 .p2align 4, , 16 -.LBB4_67: # %vector.body699 +.LBB4_66: # %vector.body699 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr1, $xr0 - xvpermi.q $xr4, $xr3, 1 + xvmul.w $xr4, $xr2, $xr0 + xvpermi.q $xr5, $xr4, 1 + vext2xv.d.w $xr5, $xr5 + xvffint.d.l $xr5, $xr5 vext2xv.d.w $xr4, $xr4 + xvld $xr6, $a5, 0 xvffint.d.l $xr4, $xr4 - vext2xv.d.w $xr3, $xr3 - xvld $xr5, $a5, 0 - xvffint.d.l $xr3, $xr3 - xvfmul.d $xr3, $xr3, $xr2 - xvfmul.d $xr4, $xr4, $xr2 - xvpermi.q $xr6, $xr5, 1 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + xvfmul.d $xr4, $xr4, $xr3 + xvfmul.d $xr5, $xr5, $xr3 + xvpermi.q $xr7, $xr6, 1 + vreplvei.w $vr8, $vr7, 3 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr7, 1 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr8, 16 + xvpermi.q $xr7, $xr9, 2 + vreplvei.w $vr8, $vr6, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 + fcvt.d.s $ft0, $ft0 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvpermi.q $xr6, $xr8, 2 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 + xvfadd.d $xr5, $xr7, $xr5 xvfadd.d $xr4, $xr6, $xr4 - xvfadd.d $xr3, $xr5, $xr3 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpermi.q $xr5, $xr6, 2 - xvst $xr5, $a5, 0 + xvfcvt.s.d $xr4, $xr5, $xr4 + xvperm.w $xr4, $xr4, $xr1 + xvst $xr4, $a5, 0 xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 - bnez $a6, .LBB4_67 -# %bb.68: # %middle.block703 - beq $a2, $a4, .LBB4_71 -.LBB4_69: # %scalar.ph692.preheader + bnez $a6, .LBB4_66 +# %bb.67: # %middle.block703 + beq $a2, $a4, .LBB4_70 +.LBB4_68: # %scalar.ph692.preheader mul.d $a2, $a0, $a3 alsl.d $a4, $a3, $t3, 2 sub.d $a1, $a1, $a3 vldi $vr0, -800 .p2align 4, , 16 -.LBB4_70: # %scalar.ph692 +.LBB4_69: # %scalar.ph692 # =>This Inner Loop Header: Depth=1 fld.s $fa1, $a4, 0 movgr2fr.w $fa2, $a2 @@ -1940,107 +1940,88 @@ partA__align: # @partA__align add.w $a2, $a2, $a0 addi.d $a1, $a1, -1 addi.d $a4, $a4, 4 - bnez $a1, .LBB4_70 -.LBB4_71: # %.preheader455 - blez $s4, .LBB4_78 -# %bb.72: # %.lr.ph475 + bnez $a1, .LBB4_69 +.LBB4_70: # %.preheader455 + blez $s0, .LBB4_77 +# %bb.71: # %.lr.ph475 pcalau12i $a0, %got_pc_hi20(offset) ld.d $a0, $a0, %got_pc_lo12(offset) ld.w $a0, $a0, 0 - addi.d $a1, $s0, 1 + addi.d $a1, $t7, 1 bstrpick.d $a1, $a1, 31, 0 addi.d $a2, $a1, -1 ori $a4, $zero, 8 ori $a3, $zero, 1 - bltu $a2, $a4, .LBB4_76 -# %bb.73: # %vector.ph708 + bltu $a2, $a4, .LBB4_75 +# %bb.72: # %vector.ph708 move $a4, $a2 bstrins.d $a4, $zero, 2, 0 ori $a5, $zero, 1 move $a3, $a2 bstrins.d $a3, $a5, 2, 0 + pcalau12i $a5, %pc_hi20(.LCPI4_2) + xvld $xr0, $a5, %pc_lo12(.LCPI4_2) pcalau12i $a5, %pc_hi20(.LCPI4_1) - xvld $xr0, $a5, %pc_lo12(.LCPI4_1) - xvreplgr2vr.w $xr1, $a0 + xvld $xr1, $a5, %pc_lo12(.LCPI4_1) + xvreplgr2vr.w $xr2, $a0 ld.d $a5, $sp, 280 # 8-byte Folded Reload addi.d $a5, $a5, 4 - xvldi $xr2, -800 + xvldi $xr3, -800 move $a6, $a4 .p2align 4, , 16 -.LBB4_74: # %vector.body713 +.LBB4_73: # %vector.body713 # =>This Inner Loop Header: Depth=1 - xvmul.w $xr3, $xr1, $xr0 - xvpermi.q $xr4, $xr3, 1 + xvmul.w $xr4, $xr2, $xr0 + xvpermi.q $xr5, $xr4, 1 + vext2xv.d.w $xr5, $xr5 + xvffint.d.l $xr5, $xr5 vext2xv.d.w $xr4, $xr4 + xvld $xr6, $a5, 0 xvffint.d.l $xr4, $xr4 - vext2xv.d.w $xr3, $xr3 - xvld $xr5, $a5, 0 - xvffint.d.l $xr3, $xr3 - xvfmul.d $xr3, $xr3, $xr2 - xvfmul.d $xr4, $xr4, $xr2 - xvpermi.q $xr6, $xr5, 1 - vreplvei.w $vr7, $vr6, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr6, 2 + xvfmul.d $xr4, $xr4, $xr3 + xvfmul.d $xr5, $xr5, $xr3 + xvpermi.q $xr7, $xr6, 1 + vreplvei.w $vr8, $vr7, 3 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr6, 1 + vreplvei.w $vr9, $vr7, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr7, 1 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr7, $vr7, 0 fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr8, 16 + xvpermi.q $xr7, $xr9, 2 + vreplvei.w $vr8, $vr6, 3 + fcvt.d.s $ft0, $ft0 + vreplvei.w $vr9, $vr6, 2 + fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr8, 16 + vreplvei.w $vr8, $vr6, 1 + fcvt.d.s $ft0, $ft0 vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vextrins.d $vr6, $vr7, 16 - xvpermi.q $xr6, $xr8, 2 - vreplvei.w $vr7, $vr5, 3 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr8, $vr5, 2 - fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr7, 16 - vreplvei.w $vr7, $vr5, 1 - fcvt.d.s $fa7, $fa7 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr7, 16 - xvpermi.q $xr5, $xr8, 2 + vextrins.d $vr6, $vr8, 16 + xvpermi.q $xr6, $xr9, 2 + xvfadd.d $xr5, $xr7, $xr5 xvfadd.d $xr4, $xr6, $xr4 - xvfadd.d $xr3, $xr5, $xr3 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpickve.d $xr4, $xr3, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr5, $xr3, 0 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr5, $vr4, 16 - xvpickve.d $xr4, $xr3, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr5, $vr4, 32 - xvpickve.d $xr3, $xr3, 3 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr5, $vr3, 48 - xvpermi.q $xr5, $xr6, 2 - xvst $xr5, $a5, 0 + xvfcvt.s.d $xr4, $xr5, $xr4 + xvperm.w $xr4, $xr4, $xr1 + xvst $xr4, $a5, 0 xvaddi.wu $xr0, $xr0, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 - bnez $a6, .LBB4_74 -# %bb.75: # %middle.block720 - beq $a2, $a4, .LBB4_78 -.LBB4_76: # %scalar.ph706.preheader + bnez $a6, .LBB4_73 +# %bb.74: # %middle.block720 + beq $a2, $a4, .LBB4_77 +.LBB4_75: # %scalar.ph706.preheader mul.d $a2, $a0, $a3 ld.d $a4, $sp, 280 # 8-byte Folded Reload alsl.d $a4, $a3, $a4, 2 sub.d $a1, $a1, $a3 vldi $vr0, -800 .p2align 4, , 16 -.LBB4_77: # %scalar.ph706 +.LBB4_76: # %scalar.ph706 # =>This Inner Loop Header: Depth=1 fld.s $fa1, $a4, 0 movgr2fr.w $fa2, $a2 @@ -2053,10 +2034,10 @@ partA__align: # @partA__align add.w $a2, $a2, $a0 addi.d $a1, $a1, -1 addi.d $a4, $a4, 4 - bnez $a1, .LBB4_77 -.LBB4_78: # %.loopexit453 - blez $t6, .LBB4_88 -.LBB4_79: # %iter.check829 + bnez $a1, .LBB4_76 +.LBB4_77: # %.loopexit453 + blez $s8, .LBB4_87 +.LBB4_78: # %iter.check829 ld.d $a0, $sp, 208 # 8-byte Folded Reload ld.d $a6, $a0, %pc_lo12(partA__align.ogcp1) addi.d $a0, $a6, 4 @@ -2069,15 +2050,15 @@ partA__align: # @partA__align addi.d $a4, $a3, -1 ori $a7, $zero, 4 ori $a5, $zero, 1 - bgeu $a4, $a7, .LBB4_83 -.LBB4_80: # %vec.epilog.scalar.ph845.preheader + bgeu $a4, $a7, .LBB4_82 +.LBB4_79: # %vec.epilog.scalar.ph845.preheader alsl.d $a4, $a5, $t3, 2 addi.d $a4, $a4, -4 alsl.d $a1, $a5, $a1, 2 alsl.d $a2, $a5, $a2, 2 sub.d $a3, $a3, $a5 .p2align 4, , 16 -.LBB4_81: # %vec.epilog.scalar.ph845 +.LBB4_80: # %vec.epilog.scalar.ph845 # =>This Inner Loop Header: Depth=1 fld.s $fa0, $a4, 0 fld.s $fa1, $a0, 0 @@ -2088,12 +2069,12 @@ partA__align: # @partA__align addi.d $a1, $a1, 4 addi.d $a3, $a3, -1 addi.d $a2, $a2, 4 - bnez $a3, .LBB4_81 -.LBB4_82: # %._crit_edge486.loopexit - ld.w $t0, $t7, 0 + bnez $a3, .LBB4_80 +.LBB4_81: # %._crit_edge486.loopexit + ld.w $t0, $t6, 0 st.d $zero, $sp, 248 # 8-byte Folded Spill - b .LBB4_89 -.LBB4_83: # %vector.memcheck814 + b .LBB4_88 +.LBB4_82: # %vector.memcheck814 addi.d $a7, $a1, 4 alsl.d $t0, $a3, $a1, 2 alsl.d $t1, $a3, $t3, 2 @@ -2101,28 +2082,28 @@ partA__align: # @partA__align sltu $t1, $a7, $t1 sltu $t2, $t3, $t0 and $t1, $t1, $t2 - bnez $t1, .LBB4_80 -# %bb.84: # %vector.memcheck814 + bnez $t1, .LBB4_79 +# %bb.83: # %vector.memcheck814 addi.d $a6, $a6, 8 sltu $a6, $a7, $a6 sltu $a7, $a0, $t0 and $a6, $a6, $a7 - bnez $a6, .LBB4_80 -# %bb.85: # %vector.main.loop.iter.check831 + bnez $a6, .LBB4_79 +# %bb.84: # %vector.main.loop.iter.check831 ori $a5, $zero, 16 - bgeu $a4, $a5, .LBB4_234 -# %bb.86: + bgeu $a4, $a5, .LBB4_233 +# %bb.85: move $a6, $zero - b .LBB4_238 -.LBB4_87: + b .LBB4_237 +.LBB4_86: ori $a0, $zero, 1 st.d $a0, $sp, 248 # 8-byte Folded Spill ori $t0, $zero, 1 - b .LBB4_89 -.LBB4_88: + b .LBB4_88 +.LBB4_87: ori $a0, $zero, 1 st.d $a0, $sp, 248 # 8-byte Folded Spill -.LBB4_89: # %._crit_edge486 +.LBB4_88: # %._crit_edge486 slli.d $a0, $t4, 32 ori $a1, $zero, 0 lu32i.d $a1, -1 @@ -2132,21 +2113,21 @@ partA__align: # @partA__align ld.d $a1, $sp, 240 # 8-byte Folded Reload ld.d $s4, $a1, %pc_lo12(partA__align.lastverticalw) sltu $a1, $zero, $t0 - add.w $a3, $a1, $s0 + add.w $a3, $a1, $t7 ori $a1, $zero, 2 fst.s $fa0, $s4, 0 st.d $a3, $sp, 240 # 8-byte Folded Spill - blt $a3, $a1, .LBB4_104 -# %bb.90: # %.lr.ph510 - st.d $t7, $sp, 8 # 8-byte Folded Spill + blt $a3, $a1, .LBB4_103 +# %bb.89: # %.lr.ph510 + st.d $t6, $sp, 8 # 8-byte Folded Spill st.d $s7, $sp, 224 # 8-byte Folded Spill - st.d $s8, $sp, 232 # 8-byte Folded Spill + st.d $s5, $sp, 232 # 8-byte Folded Spill bstrpick.d $a1, $t4, 30, 0 st.d $a1, $sp, 176 # 8-byte Folded Spill srai.d $a0, $a0, 32 ld.d $a1, $sp, 64 # 8-byte Folded Reload sltu $a1, $zero, $a1 - slt $a2, $zero, $t6 + slt $a2, $zero, $s8 and $a1, $a1, $a2 st.d $a1, $sp, 216 # 8-byte Folded Spill pcalau12i $a1, %pc_hi20(impmtx) @@ -2181,21 +2162,21 @@ partA__align: # @partA__align ori $s7, $zero, 1 slli.d $a0, $a0, 2 st.d $a0, $sp, 208 # 8-byte Folded Spill - b .LBB4_92 + b .LBB4_91 .p2align 4, , 16 -.LBB4_91: # %._crit_edge502 - # in Loop: Header=BB4_92 Depth=1 +.LBB4_90: # %._crit_edge502 + # in Loop: Header=BB4_91 Depth=1 ld.d $a1, $sp, 208 # 8-byte Folded Reload fldx.s $fa0, $s3, $a1 addi.d $s7, $s7, 1 fstx.s $fa0, $s4, $a0 move $t3, $s3 ld.d $a0, $sp, 240 # 8-byte Folded Reload - beq $s7, $a0, .LBB4_105 -.LBB4_92: # =>This Loop Header: Depth=1 - # Child Loop BB4_94 Depth 2 - # Child Loop BB4_98 Depth 2 - move $s8, $s2 + beq $s7, $a0, .LBB4_104 +.LBB4_91: # =>This Loop Header: Depth=1 + # Child Loop BB4_93 Depth 2 + # Child Loop BB4_97 Depth 2 + move $s5, $s2 addi.d $s6, $s7, -1 slli.d $s0, $s6, 2 ld.d $a0, $sp, 280 # 8-byte Folded Reload @@ -2207,17 +2188,18 @@ partA__align: # @partA__align ld.d $a1, $sp, 256 # 8-byte Folded Reload ld.d $a2, $sp, 264 # 8-byte Folded Reload move $a3, $s7 - move $a4, $t6 + move $a4, $s8 ld.d $a5, $sp, 232 # 8-byte Folded Reload ld.d $a6, $sp, 224 # 8-byte Folded Reload move $a7, $zero pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 slli.d $a0, $s7, 2 + ld.d $a6, $sp, 272 # 8-byte Folded Reload ld.d $a1, $sp, 216 # 8-byte Folded Reload - beqz $a1, .LBB4_95 -# %bb.93: # %.lr.ph.i342 - # in Loop: Header=BB4_92 Depth=1 + beqz $a1, .LBB4_94 +# %bb.92: # %.lr.ph.i342 + # in Loop: Header=BB4_91 Depth=1 ld.d $a1, $sp, 192 # 8-byte Folded Reload ldx.w $a1, $a1, $a0 ld.d $a2, $sp, 184 # 8-byte Folded Reload @@ -2229,10 +2211,10 @@ partA__align: # @partA__align move $a3, $s3 ld.d $a4, $sp, 176 # 8-byte Folded Reload .p2align 4, , 16 -.LBB4_94: # Parent Loop BB4_92 Depth=1 +.LBB4_93: # Parent Loop BB4_91 Depth=1 # => This Inner Loop Header: Depth=2 ld.w $a5, $a2, 0 - add.w $a5, $a5, $s5 + add.w $a5, $a5, $a6 slli.d $a5, $a5, 2 fldx.s $fa0, $a1, $a5 fld.s $fa1, $a3, 0 @@ -2241,19 +2223,18 @@ partA__align: # @partA__align addi.d $a4, $a4, -1 addi.d $a3, $a3, 4 addi.d $a2, $a2, 4 - bnez $a4, .LBB4_94 -.LBB4_95: # %part_imp_match_out_vead_gapmap.exit347 - # in Loop: Header=BB4_92 Depth=1 + bnez $a4, .LBB4_93 +.LBB4_94: # %part_imp_match_out_vead_gapmap.exit347 + # in Loop: Header=BB4_91 Depth=1 ld.d $a1, $sp, 280 # 8-byte Folded Reload fldx.s $fa0, $a1, $a0 fst.s $fa0, $s3, 0 move $t5, $s2 - move $s2, $s8 - ld.d $t6, $sp, 272 # 8-byte Folded Reload + move $s2, $s5 ld.d $a1, $sp, 248 # 8-byte Folded Reload - bnez $a1, .LBB4_91 -# %bb.96: # %.lr.ph501.preheader - # in Loop: Header=BB4_92 Depth=1 + bnez $a1, .LBB4_90 +# %bb.95: # %.lr.ph501.preheader + # in Loop: Header=BB4_91 Depth=1 move $a1, $zero move $a2, $zero fld.s $fa2, $t5, 0 @@ -2275,9 +2256,9 @@ partA__align: # @partA__align ld.d $t0, $sp, 120 # 8-byte Folded Reload ld.d $t1, $sp, 144 # 8-byte Folded Reload move $t2, $t5 - b .LBB4_98 + b .LBB4_97 .p2align 4, , 16 -.LBB4_97: # in Loop: Header=BB4_98 Depth=2 +.LBB4_96: # in Loop: Header=BB4_97 Depth=2 fadd.s $fa3, $fa3, $fa4 fcmp.cult.s $fcc0, $fa3, $fa2 fsel $fa2, $fa3, $fa2, $fcc0 @@ -2297,9 +2278,9 @@ partA__align: # @partA__align addi.d $a6, $a6, 4 addi.w $a1, $a1, 1 addi.d $a5, $a5, -1 - beq $t6, $a1, .LBB4_91 -.LBB4_98: # %.lr.ph501 - # Parent Loop BB4_92 Depth=1 + beq $s8, $a1, .LBB4_90 +.LBB4_97: # %.lr.ph501 + # Parent Loop BB4_91 Depth=1 # => This Inner Loop Header: Depth=2 fld.s $fa4, $t1, 0 fld.s $fa3, $t2, 0 @@ -2307,46 +2288,46 @@ partA__align: # @partA__align fcmp.cule.s $fcc0, $fa4, $fa3 st.w $zero, $a3, 0 fmov.s $fs0, $fa3 - bcnez $fcc0, .LBB4_100 -# %bb.99: # in Loop: Header=BB4_98 Depth=2 + bcnez $fcc0, .LBB4_99 +# %bb.98: # in Loop: Header=BB4_97 Depth=2 add.d $t3, $a2, $a5 st.w $t3, $a3, 0 fmov.s $fs0, $fa4 -.LBB4_100: # in Loop: Header=BB4_98 Depth=2 +.LBB4_99: # in Loop: Header=BB4_97 Depth=2 fld.s $fa5, $t0, 0 fld.s $fa4, $a6, 0 fadd.s $fa6, $fa0, $fa5 fcmp.cule.s $fcc0, $fa6, $fs0 - bcnez $fcc0, .LBB4_102 -# %bb.101: # in Loop: Header=BB4_98 Depth=2 + bcnez $fcc0, .LBB4_101 +# %bb.100: # in Loop: Header=BB4_97 Depth=2 ld.w $t3, $a7, 0 sub.d $t3, $s7, $t3 st.w $t3, $a3, 0 fmov.s $fs0, $fa6 -.LBB4_102: # in Loop: Header=BB4_98 Depth=2 +.LBB4_101: # in Loop: Header=BB4_97 Depth=2 fadd.s $fa6, $fa1, $fa3 fcmp.cult.s $fcc0, $fa6, $fa5 - bcnez $fcc0, .LBB4_97 -# %bb.103: # in Loop: Header=BB4_98 Depth=2 + bcnez $fcc0, .LBB4_96 +# %bb.102: # in Loop: Header=BB4_97 Depth=2 fst.s $fa6, $t0, 0 st.w $s6, $a7, 0 - b .LBB4_97 -.LBB4_104: + b .LBB4_96 +.LBB4_103: movgr2fr.w $fs0, $zero - bnez $t0, .LBB4_120 - b .LBB4_106 -.LBB4_105: # %._crit_edge511.loopexit + bnez $t0, .LBB4_119 + b .LBB4_105 +.LBB4_104: # %._crit_edge511.loopexit ld.d $a0, $sp, 8 # 8-byte Folded Reload ld.w $t0, $a0, 0 move $t3, $s3 ld.d $s6, $sp, 96 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload + ld.d $t7, $sp, 56 # 8-byte Folded Reload ld.d $t4, $sp, 88 # 8-byte Folded Reload - bnez $t0, .LBB4_120 -.LBB4_106: # %.preheader451 + bnez $t0, .LBB4_119 +.LBB4_105: # %.preheader451 ld.d $a0, $sp, 248 # 8-byte Folded Reload - bnez $a0, .LBB4_113 -# %bb.107: # %.lr.ph516 + bnez $a0, .LBB4_112 +# %bb.106: # %.lr.ph516 pcalau12i $a0, %got_pc_hi20(offset) ld.d $a0, $a0, %got_pc_lo12(offset) ld.w $a0, $a0, 0 @@ -2355,27 +2336,37 @@ partA__align: # @partA__align addi.d $a2, $a1, -1 ori $a4, $zero, 8 ori $a3, $zero, 1 - bltu $a2, $a4, .LBB4_111 -# %bb.108: # %vector.ph864 + bltu $a2, $a4, .LBB4_110 +# %bb.107: # %vector.ph864 move $a4, $a2 bstrins.d $a4, $zero, 2, 0 ori $a5, $zero, 1 move $a3, $a2 bstrins.d $a3, $a5, 2, 0 xvreplgr2vr.w $xr0, $a0 - pcalau12i $a5, %pc_hi20(.LCPI4_2) - xvld $xr1, $a5, %pc_lo12(.LCPI4_2) pcalau12i $a5, %pc_hi20(.LCPI4_3) - xvld $xr2, $a5, %pc_lo12(.LCPI4_3) - xvreplgr2vr.d $xr3, $t4 + xvld $xr1, $a5, %pc_lo12(.LCPI4_3) + pcalau12i $a5, %pc_hi20(.LCPI4_4) + xvld $xr2, $a5, %pc_lo12(.LCPI4_4) + pcalau12i $a5, %pc_hi20(.LCPI4_1) + xvld $xr3, $a5, %pc_lo12(.LCPI4_1) + xvreplgr2vr.d $xr4, $t4 addi.d $a5, $t3, 4 - xvldi $xr4, -800 + xvldi $xr5, -800 move $a6, $a4 .p2align 4, , 16 -.LBB4_109: # %vector.body871 +.LBB4_108: # %vector.body871 # =>This Inner Loop Header: Depth=1 - xvsub.d $xr5, $xr3, $xr1 - xvsub.d $xr6, $xr3, $xr2 + xvsub.d $xr6, $xr4, $xr1 + xvsub.d $xr7, $xr4, $xr2 + xvpickve2gr.d $a7, $xr7, 0 + vinsgr2vr.w $vr8, $a7, 0 + xvpickve2gr.d $a7, $xr7, 1 + vinsgr2vr.w $vr8, $a7, 1 + xvpickve2gr.d $a7, $xr7, 2 + vinsgr2vr.w $vr8, $a7, 2 + xvpickve2gr.d $a7, $xr7, 3 + vinsgr2vr.w $vr8, $a7, 3 xvpickve2gr.d $a7, $xr6, 0 vinsgr2vr.w $vr7, $a7, 0 xvpickve2gr.d $a7, $xr6, 1 @@ -2384,88 +2375,59 @@ partA__align: # @partA__align vinsgr2vr.w $vr7, $a7, 2 xvpickve2gr.d $a7, $xr6, 3 vinsgr2vr.w $vr7, $a7, 3 - xvpickve2gr.d $a7, $xr5, 0 - vinsgr2vr.w $vr6, $a7, 0 - xvpickve2gr.d $a7, $xr5, 1 - vinsgr2vr.w $vr6, $a7, 1 - xvpickve2gr.d $a7, $xr5, 2 - vinsgr2vr.w $vr6, $a7, 2 - xvpickve2gr.d $a7, $xr5, 3 - vinsgr2vr.w $vr6, $a7, 3 - xvpermi.q $xr7, $xr6, 2 - xvmul.w $xr5, $xr0, $xr7 - xvpermi.q $xr6, $xr5, 1 + xvpermi.q $xr8, $xr7, 2 + xvmul.w $xr6, $xr0, $xr8 + xvpermi.q $xr7, $xr6, 1 + vext2xv.d.w $xr7, $xr7 + xvffint.d.l $xr7, $xr7 vext2xv.d.w $xr6, $xr6 + xvld $xr8, $a5, 0 xvffint.d.l $xr6, $xr6 - vext2xv.d.w $xr5, $xr5 - xvld $xr7, $a5, 0 - xvffint.d.l $xr5, $xr5 - xvfmul.d $xr5, $xr5, $xr4 - xvfmul.d $xr6, $xr6, $xr4 - xvpermi.q $xr8, $xr7, 1 - vreplvei.w $vr9, $vr8, 3 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr10, $vr8, 2 + xvfmul.d $xr6, $xr6, $xr5 + xvfmul.d $xr7, $xr7, $xr5 + xvpermi.q $xr9, $xr8, 1 + vreplvei.w $vr10, $vr9, 3 fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr9, 16 - vreplvei.w $vr9, $vr8, 1 + vreplvei.w $vr11, $vr9, 2 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr10, 16 + vreplvei.w $vr10, $vr9, 1 + fcvt.d.s $ft2, $ft2 + vreplvei.w $vr9, $vr9, 0 fcvt.d.s $ft1, $ft1 + vextrins.d $vr9, $vr10, 16 + xvpermi.q $xr9, $xr11, 2 + vreplvei.w $vr10, $vr8, 3 + fcvt.d.s $ft2, $ft2 + vreplvei.w $vr11, $vr8, 2 + fcvt.d.s $ft3, $ft3 + vextrins.d $vr11, $vr10, 16 + vreplvei.w $vr10, $vr8, 1 + fcvt.d.s $ft2, $ft2 vreplvei.w $vr8, $vr8, 0 fcvt.d.s $ft0, $ft0 - vextrins.d $vr8, $vr9, 16 - xvpermi.q $xr8, $xr10, 2 - vreplvei.w $vr9, $vr7, 3 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr10, $vr7, 2 - fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr9, 16 - vreplvei.w $vr9, $vr7, 1 - fcvt.d.s $ft1, $ft1 - vreplvei.w $vr7, $vr7, 0 - fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr9, 16 - xvpermi.q $xr7, $xr10, 2 + vextrins.d $vr8, $vr10, 16 + xvpermi.q $xr8, $xr11, 2 + xvfadd.d $xr7, $xr9, $xr7 xvfadd.d $xr6, $xr8, $xr6 - xvfadd.d $xr5, $xr7, $xr5 - xvpickve.d $xr7, $xr6, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr6, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr6, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr6, $xr6, 3 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr8, $vr6, 48 - xvpickve.d $xr6, $xr5, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr5, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr5, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 48 - xvpermi.q $xr7, $xr8, 2 - xvst $xr7, $a5, 0 + xvfcvt.s.d $xr6, $xr7, $xr6 + xvperm.w $xr6, $xr6, $xr3 + xvst $xr6, $a5, 0 xvaddi.du $xr2, $xr2, 8 xvaddi.du $xr1, $xr1, 8 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 - bnez $a6, .LBB4_109 -# %bb.110: # %middle.block878 - beq $a2, $a4, .LBB4_113 -.LBB4_111: # %scalar.ph862.preheader + bnez $a6, .LBB4_108 +# %bb.109: # %middle.block878 + beq $a2, $a4, .LBB4_112 +.LBB4_110: # %scalar.ph862.preheader sub.w $a2, $t4, $a3 mul.d $a2, $a0, $a2 alsl.d $a4, $a3, $t3, 2 sub.d $a1, $a1, $a3 vldi $vr0, -800 .p2align 4, , 16 -.LBB4_112: # %scalar.ph862 +.LBB4_111: # %scalar.ph862 # =>This Inner Loop Header: Depth=1 fld.s $fa1, $a4, 0 movgr2fr.w $fa2, $a2 @@ -2478,12 +2440,12 @@ partA__align: # @partA__align sub.w $a2, $a2, $a0 addi.d $a1, $a1, -1 addi.d $a4, $a4, 4 - bnez $a1, .LBB4_112 -.LBB4_113: # %.preheader450 + bnez $a1, .LBB4_111 +.LBB4_112: # %.preheader450 ld.d $a0, $sp, 32 # 8-byte Folded Reload - blez $a0, .LBB4_120 -# %bb.114: # %.lr.ph519 - bstrpick.d $a0, $s0, 31, 0 + blez $a0, .LBB4_119 +# %bb.113: # %.lr.ph519 + bstrpick.d $a0, $t7, 31, 0 pcalau12i $a1, %got_pc_hi20(offset) ld.d $a1, $a1, %got_pc_lo12(offset) ld.w $a1, $a1, 0 @@ -2492,27 +2454,27 @@ partA__align: # @partA__align movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 fneg.d $fa1, $fa1 - addi.d $a0, $s0, 1 + addi.d $a0, $t7, 1 bstrpick.d $a1, $a0, 31, 0 addi.d $a2, $a1, -1 ori $a3, $zero, 4 ori $a0, $zero, 1 - bltu $a2, $a3, .LBB4_118 -# %bb.115: # %vector.ph883 + bltu $a2, $a3, .LBB4_117 +# %bb.114: # %vector.ph883 move $a3, $a2 bstrins.d $a3, $zero, 1, 0 ori $a4, $zero, 1 move $a0, $a2 bstrins.d $a0, $a4, 1, 0 xvreplve0.d $xr2, $xr0 - pcalau12i $a4, %pc_hi20(.LCPI4_4) - vld $vr3, $a4, %pc_lo12(.LCPI4_4) + pcalau12i $a4, %pc_hi20(.LCPI4_5) + vld $vr3, $a4, %pc_lo12(.LCPI4_5) xvreplve0.d $xr4, $xr1 addi.d $a4, $s4, 4 xvldi $xr5, -800 move $a5, $a3 .p2align 4, , 16 -.LBB4_116: # %vector.body890 +.LBB4_115: # %vector.body890 # =>This Inner Loop Header: Depth=1 vext2xv.du.wu $xr6, $xr3 vld $vr7, $a4, 0 @@ -2531,30 +2493,21 @@ partA__align: # @partA__align vextrins.d $vr7, $vr8, 16 xvpermi.q $xr7, $xr9, 2 xvfmadd.d $xr6, $xr4, $xr6, $xr7 - xvpickve.d $xr7, $xr6, 1 - fcvt.s.d $fa7, $fa7 - xvpickve.d $xr8, $xr6, 0 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr8, $vr7, 16 - xvpickve.d $xr7, $xr6, 2 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr8, $vr7, 32 - xvpickve.d $xr6, $xr6, 3 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr8, $vr6, 48 - vst $vr8, $a4, 0 + xvpermi.q $xr7, $xr6, 1 + vfcvt.s.d $vr6, $vr7, $vr6 + vst $vr6, $a4, 0 vaddi.wu $vr3, $vr3, 4 addi.d $a5, $a5, -4 addi.d $a4, $a4, 16 - bnez $a5, .LBB4_116 -# %bb.117: # %middle.block897 - beq $a2, $a3, .LBB4_120 -.LBB4_118: # %scalar.ph881.preheader + bnez $a5, .LBB4_115 +# %bb.116: # %middle.block897 + beq $a2, $a3, .LBB4_119 +.LBB4_117: # %scalar.ph881.preheader alsl.d $a2, $a0, $s4, 2 sub.d $a1, $a1, $a0 vldi $vr2, -800 .p2align 4, , 16 -.LBB4_119: # %scalar.ph881 +.LBB4_118: # %scalar.ph881 # =>This Inner Loop Header: Depth=1 bstrpick.d $a3, $a0, 31, 0 movgr2fr.d $fa3, $a3 @@ -2569,9 +2522,9 @@ partA__align: # @partA__align addi.w $a0, $a0, 1 addi.d $a1, $a1, -1 addi.d $a2, $a2, 4 - bnez $a1, .LBB4_119 -.LBB4_120: # %.loopexit - st.d $t0, $sp, 264 # 8-byte Folded Spill + bnez $a1, .LBB4_118 +.LBB4_119: # %.loopexit + move $s5, $t0 move $s0, $t3 ld.d $a0, $sp, 72 # 8-byte Folded Reload ld.d $s3, $a0, %pc_lo12(partA__align.mseq1) @@ -2579,7 +2532,7 @@ partA__align: # @partA__align ld.d $s7, $a0, %pc_lo12(partA__align.mseq2) ld.d $a0, $sp, 40 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partA__align.ijp) - st.d $a0, $sp, 272 # 8-byte Folded Spill + st.d $a0, $sp, 280 # 8-byte Folded Spill ld.d $a0, $s2, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 @@ -2588,23 +2541,20 @@ partA__align: # @partA__align move $a0, $a1 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 - addi.w $a1, $s8, 0 - st.d $a1, $sp, 280 # 8-byte Folded Spill + addi.w $t1, $s8, 0 addi.w $a3, $a0, 0 add.w $a1, $a0, $s8 ld.d $a2, $sp, 64 # 8-byte Folded Reload - beqz $a2, .LBB4_127 -# %bb.121: + beqz $a2, .LBB4_126 +# %bb.120: ori $a4, $zero, 1 - ld.d $ra, $sp, 272 # 8-byte Folded Reload - ld.d $a5, $sp, 264 # 8-byte Folded Reload - beq $a5, $a4, .LBB4_138 -# %bb.122: + ld.d $ra, $sp, 280 # 8-byte Folded Reload + beq $s5, $a4, .LBB4_137 +# %bb.121: move $a2, $s0 fld.s $fa0, $s4, 0 - ld.d $a4, $sp, 280 # 8-byte Folded Reload - blez $a4, .LBB4_133 -# %bb.123: # %.lr.ph.i348 + blez $t1, .LBB4_132 +# %bb.122: # %.lr.ph.i348 slli.d $a4, $s8, 3 bstrpick.d $a4, $a4, 33, 3 slli.d $a4, $a4, 3 @@ -2612,33 +2562,31 @@ partA__align: # @partA__align srai.d $a5, $a5, 30 bstrpick.d $a6, $s8, 30, 0 move $a7, $s8 - b .LBB4_125 + b .LBB4_124 .p2align 4, , 16 -.LBB4_124: # in Loop: Header=BB4_125 Depth=1 +.LBB4_123: # in Loop: Header=BB4_124 Depth=1 addi.d $a6, $a6, -1 addi.d $a7, $a7, -1 addi.d $s4, $s4, 4 - beqz $a6, .LBB4_133 -.LBB4_125: # =>This Inner Loop Header: Depth=1 + beqz $a6, .LBB4_132 +.LBB4_124: # =>This Inner Loop Header: Depth=1 fld.s $fa1, $s4, 0 fcmp.cult.s $fcc0, $fa1, $fa0 - bcnez $fcc0, .LBB4_124 -# %bb.126: # in Loop: Header=BB4_125 Depth=1 + bcnez $fcc0, .LBB4_123 +# %bb.125: # in Loop: Header=BB4_124 Depth=1 ldx.d $t0, $ra, $a4 stx.w $a7, $t0, $a5 fmov.s $fa0, $fa1 - b .LBB4_124 -.LBB4_127: + b .LBB4_123 +.LBB4_126: ori $a2, $zero, 1 - ld.d $s5, $sp, 272 # 8-byte Folded Reload - ld.d $a4, $sp, 264 # 8-byte Folded Reload - beq $a4, $a2, .LBB4_156 -# %bb.128: + ld.d $ra, $sp, 280 # 8-byte Folded Reload + beq $s5, $a2, .LBB4_155 +# %bb.127: move $t0, $s0 fld.s $fa0, $s4, 0 - ld.d $a2, $sp, 280 # 8-byte Folded Reload - blez $a2, .LBB4_151 -# %bb.129: # %.lr.ph.i441 + blez $t1, .LBB4_150 +# %bb.128: # %.lr.ph.i441 slli.d $a2, $s8, 3 bstrpick.d $a2, $a2, 33, 3 slli.d $a4, $a2, 3 @@ -2646,25 +2594,25 @@ partA__align: # @partA__align srai.d $a5, $a2, 30 bstrpick.d $a6, $s8, 30, 0 move $a7, $s8 - b .LBB4_131 + b .LBB4_130 .p2align 4, , 16 -.LBB4_130: # in Loop: Header=BB4_131 Depth=1 +.LBB4_129: # in Loop: Header=BB4_130 Depth=1 addi.d $a6, $a6, -1 addi.d $a7, $a7, -1 addi.d $s4, $s4, 4 - beqz $a6, .LBB4_151 -.LBB4_131: # =>This Inner Loop Header: Depth=1 + beqz $a6, .LBB4_150 +.LBB4_130: # =>This Inner Loop Header: Depth=1 fld.s $fa1, $s4, 0 fcmp.cult.s $fcc0, $fa1, $fa0 - bcnez $fcc0, .LBB4_130 -# %bb.132: # in Loop: Header=BB4_131 Depth=1 - ldx.d $a2, $s5, $a4 + bcnez $fcc0, .LBB4_129 +# %bb.131: # in Loop: Header=BB4_130 Depth=1 + ldx.d $a2, $ra, $a4 stx.w $a7, $a2, $a5 fmov.s $fa0, $fa1 - b .LBB4_130 -.LBB4_133: # %.preheader9.i - blez $a3, .LBB4_138 -# %bb.134: # %.lr.ph14.i + b .LBB4_129 +.LBB4_132: # %.preheader9.i + blez $a3, .LBB4_137 +# %bb.133: # %.lr.ph14.i slli.d $a4, $s8, 32 srai.d $a4, $a4, 29 slli.d $a5, $a0, 2 @@ -2672,42 +2620,42 @@ partA__align: # @partA__align slli.d $a5, $a5, 2 bstrpick.d $a6, $a0, 30, 0 sub.d $a7, $zero, $a0 - b .LBB4_136 + b .LBB4_135 .p2align 4, , 16 -.LBB4_135: # in Loop: Header=BB4_136 Depth=1 +.LBB4_134: # in Loop: Header=BB4_135 Depth=1 addi.d $a6, $a6, -1 addi.d $a7, $a7, 1 addi.d $a2, $a2, 4 - beqz $a6, .LBB4_138 -.LBB4_136: # =>This Inner Loop Header: Depth=1 + beqz $a6, .LBB4_137 +.LBB4_135: # =>This Inner Loop Header: Depth=1 fld.s $fa1, $a2, 0 fcmp.cult.s $fcc0, $fa1, $fa0 - bcnez $fcc0, .LBB4_135 -# %bb.137: # in Loop: Header=BB4_136 Depth=1 + bcnez $fcc0, .LBB4_134 +# %bb.136: # in Loop: Header=BB4_135 Depth=1 ldx.d $t0, $ra, $a4 stx.w $a7, $t0, $a5 fmov.s $fa0, $fa1 - b .LBB4_135 -.LBB4_138: # %.loopexit.i - ld.d $a2, $sp, 280 # 8-byte Folded Reload - bltz $a2, .LBB4_146 -# %bb.139: # %.lr.ph17.preheader.i + b .LBB4_134 +.LBB4_137: # %.loopexit.i + move $s5, $t1 + bltz $t1, .LBB4_145 +# %bb.138: # %.lr.ph17.preheader.i addi.d $a5, $s8, 1 bstrpick.d $a4, $a5, 31, 0 ori $a6, $zero, 4 - bgeu $a4, $a6, .LBB4_141 -# %bb.140: + bgeu $a4, $a6, .LBB4_140 +# %bb.139: move $a5, $zero - b .LBB4_144 -.LBB4_141: # %vector.ph951 - pcalau12i $a6, %pc_hi20(.LCPI4_5) - vld $vr0, $a6, %pc_lo12(.LCPI4_5) + b .LBB4_143 +.LBB4_140: # %vector.ph951 + pcalau12i $a6, %pc_hi20(.LCPI4_6) + vld $vr0, $a6, %pc_lo12(.LCPI4_6) bstrpick.d $a5, $a5, 31, 2 slli.d $a5, $a5, 2 addi.d $a6, $ra, 16 move $a7, $a5 .p2align 4, , 16 -.LBB4_142: # %vector.body954 +.LBB4_141: # %vector.body954 # =>This Inner Loop Header: Depth=1 vld $vr1, $a6, -16 vld $vr2, $a6, 0 @@ -2725,42 +2673,42 @@ partA__align: # @partA__align vaddi.du $vr0, $vr0, 4 addi.d $a7, $a7, -4 addi.d $a6, $a6, 32 - bnez $a7, .LBB4_142 -# %bb.143: # %middle.block962 - beq $a4, $a5, .LBB4_146 -.LBB4_144: # %.lr.ph17.i.preheader + bnez $a7, .LBB4_141 +# %bb.142: # %middle.block962 + beq $a4, $a5, .LBB4_145 +.LBB4_143: # %.lr.ph17.i.preheader alsl.d $a6, $a5, $ra, 3 sub.d $a4, $a4, $a5 addi.d $a5, $a5, 1 .p2align 4, , 16 -.LBB4_145: # %.lr.ph17.i +.LBB4_144: # %.lr.ph17.i # =>This Inner Loop Header: Depth=1 ld.d $a7, $a6, 0 st.w $a5, $a7, 0 addi.d $a6, $a6, 8 addi.d $a4, $a4, -1 addi.d $a5, $a5, 1 - bnez $a4, .LBB4_145 -.LBB4_146: # %.preheader8.i - bltz $a3, .LBB4_195 -# %bb.147: # %iter.check968 + bnez $a4, .LBB4_144 +.LBB4_145: # %.preheader8.i + bltz $a3, .LBB4_194 +# %bb.146: # %iter.check968 ld.d $a3, $ra, 0 addi.d $a6, $a0, 1 bstrpick.d $a4, $a6, 31, 0 ori $a5, $zero, 4 - bgeu $a4, $a5, .LBB4_149 -# %bb.148: + bgeu $a4, $a5, .LBB4_148 +# %bb.147: move $a5, $zero - b .LBB4_193 -.LBB4_149: # %vector.main.loop.iter.check970 + b .LBB4_192 +.LBB4_148: # %vector.main.loop.iter.check970 ori $a5, $zero, 16 - bgeu $a4, $a5, .LBB4_186 -# %bb.150: + bgeu $a4, $a5, .LBB4_185 +# %bb.149: move $a5, $zero - b .LBB4_190 -.LBB4_151: # %.preheader9.i353 - blez $a3, .LBB4_156 -# %bb.152: # %.lr.ph14.i433 + b .LBB4_189 +.LBB4_150: # %.preheader9.i353 + blez $a3, .LBB4_155 +# %bb.151: # %.lr.ph14.i433 slli.d $a2, $s8, 32 srai.d $a4, $a2, 29 slli.d $a2, $a0, 2 @@ -2768,288 +2716,207 @@ partA__align: # @partA__align slli.d $a5, $a2, 2 bstrpick.d $a6, $a0, 30, 0 sub.d $a7, $zero, $a0 - b .LBB4_154 + b .LBB4_153 .p2align 4, , 16 -.LBB4_153: # in Loop: Header=BB4_154 Depth=1 +.LBB4_152: # in Loop: Header=BB4_153 Depth=1 addi.d $a6, $a6, -1 addi.d $a7, $a7, 1 addi.d $t0, $t0, 4 - beqz $a6, .LBB4_156 -.LBB4_154: # =>This Inner Loop Header: Depth=1 + beqz $a6, .LBB4_155 +.LBB4_153: # =>This Inner Loop Header: Depth=1 fld.s $fa1, $t0, 0 fcmp.cult.s $fcc0, $fa1, $fa0 - bcnez $fcc0, .LBB4_153 -# %bb.155: # in Loop: Header=BB4_154 Depth=1 - ldx.d $a2, $s5, $a4 + bcnez $fcc0, .LBB4_152 +# %bb.154: # in Loop: Header=BB4_153 Depth=1 + ldx.d $a2, $ra, $a4 stx.w $a7, $a2, $a5 fmov.s $fa0, $fa1 - b .LBB4_153 -.LBB4_156: # %.loopexit.i355 - ld.d $a2, $sp, 280 # 8-byte Folded Reload - bltz $a2, .LBB4_181 -# %bb.157: # %.lr.ph17.preheader.i357 + b .LBB4_152 +.LBB4_155: # %.loopexit.i355 + bltz $t1, .LBB4_180 +# %bb.156: # %.lr.ph17.preheader.i357 addi.d $a4, $s8, 1 bstrpick.d $a2, $a4, 31, 0 ori $a5, $zero, 4 - bgeu $a2, $a5, .LBB4_176 -# %bb.158: + bgeu $a2, $a5, .LBB4_175 +# %bb.157: move $a4, $zero - b .LBB4_179 -.LBB4_159: # %vector.ph660 + b .LBB4_178 +.LBB4_158: # %vector.ph660 bstrpick.d $a3, $a7, 30, 3 slli.d $a3, $a3, 3 - xvreplve0.d $xr1, $xr0 - xvldi $xr2, -912 - xvldi $xr3, -928 + pcalau12i $a4, %pc_hi20(.LCPI4_1) + xvld $xr1, $a4, %pc_lo12(.LCPI4_1) + xvreplve0.d $xr2, $xr0 + xvldi $xr3, -912 + xvldi $xr4, -928 move $a4, $a0 move $a5, $a2 move $a6, $a3 .p2align 4, , 16 -.LBB4_160: # %vector.body663 +.LBB4_159: # %vector.body663 # =>This Inner Loop Header: Depth=1 - xvld $xr4, $a4, 0 - xvpermi.q $xr5, $xr4, 1 - vreplvei.w $vr6, $vr5, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr5, 2 + xvld $xr5, $a4, 0 + xvpermi.q $xr6, $xr5, 1 + vreplvei.w $vr7, $vr6, 3 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr5, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr6, 16 - xvpermi.q $xr5, $xr7, 2 - vreplvei.w $vr6, $vr4, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr4, 2 + vreplvei.w $vr8, $vr6, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr7, 16 + vreplvei.w $vr7, $vr6, 1 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr4, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr4, $vr4, 0 - fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr6, 16 - xvpermi.q $xr4, $xr7, 2 - xvfsub.d $xr5, $xr2, $xr5 - xvfsub.d $xr4, $xr2, $xr4 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr4, $xr4, $xr3 - xvfmul.d $xr5, $xr5, $xr1 - xvfmul.d $xr4, $xr4, $xr1 - xvpickve.d $xr6, $xr5, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr5, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr5, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 48 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - xvld $xr5, $a5, 0 - vextrins.w $vr6, $vr4, 48 - xvpermi.q $xr6, $xr7, 2 - xvst $xr6, $a4, 0 - xvpermi.q $xr4, $xr5, 1 - vreplvei.w $vr6, $vr4, 3 + vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr4, 2 + vextrins.d $vr6, $vr7, 16 + xvpermi.q $xr6, $xr8, 2 + vreplvei.w $vr7, $vr5, 3 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr4, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr4, $vr4, 0 - fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr6, 16 - xvpermi.q $xr4, $xr7, 2 + vreplvei.w $vr8, $vr5, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr7, 16 + vreplvei.w $vr7, $vr5, 1 + fcvt.d.s $fa7, $fa7 + vreplvei.w $vr5, $vr5, 0 + fcvt.d.s $fa5, $fa5 + vextrins.d $vr5, $vr7, 16 + xvpermi.q $xr5, $xr8, 2 + xvfsub.d $xr6, $xr3, $xr6 + xvfsub.d $xr5, $xr3, $xr5 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr5, $xr5, $xr4 + xvfmul.d $xr6, $xr6, $xr2 + xvfmul.d $xr5, $xr5, $xr2 + xvld $xr7, $a5, 0 + xvfcvt.s.d $xr5, $xr6, $xr5 + xvperm.w $xr5, $xr5, $xr1 + xvst $xr5, $a4, 0 + xvpermi.q $xr5, $xr7, 1 vreplvei.w $vr6, $vr5, 3 fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr5, 2 - fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 + vreplvei.w $vr8, $vr5, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr6, 16 vreplvei.w $vr6, $vr5, 1 fcvt.d.s $fa6, $fa6 vreplvei.w $vr5, $vr5, 0 fcvt.d.s $fa5, $fa5 vextrins.d $vr5, $vr6, 16 - xvpermi.q $xr5, $xr7, 2 - xvfsub.d $xr4, $xr2, $xr4 - xvfsub.d $xr5, $xr2, $xr5 - xvfmul.d $xr4, $xr4, $xr3 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr4, $xr4, $xr1 - xvfmul.d $xr5, $xr5, $xr1 - xvpickve.d $xr6, $xr4, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr4, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr4, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr7, $vr4, 48 - xvpickve.d $xr4, $xr5, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr6, $xr5, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr4, 16 - xvpickve.d $xr4, $xr5, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 32 - xvpickve.d $xr4, $xr5, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpermi.q $xr6, $xr7, 2 - xvst $xr6, $a5, 0 + xvpermi.q $xr5, $xr8, 2 + vreplvei.w $vr6, $vr7, 3 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr8, $vr7, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr6, 16 + vreplvei.w $vr6, $vr7, 1 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr7, $vr7, 0 + fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr6, 16 + xvpermi.q $xr7, $xr8, 2 + xvfsub.d $xr5, $xr3, $xr5 + xvfsub.d $xr6, $xr3, $xr7 + xvfmul.d $xr5, $xr5, $xr4 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr5, $xr5, $xr2 + xvfmul.d $xr6, $xr6, $xr2 + xvfcvt.s.d $xr5, $xr5, $xr6 + xvperm.w $xr5, $xr5, $xr1 + xvst $xr5, $a5, 0 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 addi.d $a4, $a4, 32 - bnez $a6, .LBB4_160 -# %bb.161: # %middle.block668 - beq $a1, $a3, .LBB4_40 - b .LBB4_38 -.LBB4_162: # %vector.ph679 + bnez $a6, .LBB4_159 +# %bb.160: # %middle.block668 + beq $a1, $a3, .LBB4_39 + b .LBB4_37 +.LBB4_161: # %vector.ph679 bstrpick.d $a3, $t0, 30, 3 slli.d $a3, $a3, 3 - xvreplve0.d $xr1, $xr0 - xvldi $xr2, -912 - xvldi $xr3, -928 + pcalau12i $a4, %pc_hi20(.LCPI4_1) + xvld $xr1, $a4, %pc_lo12(.LCPI4_1) + xvreplve0.d $xr2, $xr0 + xvldi $xr3, -912 + xvldi $xr4, -928 move $a4, $a0 move $a5, $a2 move $a6, $a3 .p2align 4, , 16 -.LBB4_163: # %vector.body684 +.LBB4_162: # %vector.body684 # =>This Inner Loop Header: Depth=1 - xvld $xr4, $a4, 0 - xvpermi.q $xr5, $xr4, 1 - vreplvei.w $vr6, $vr5, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr5, 2 + xvld $xr5, $a4, 0 + xvpermi.q $xr6, $xr5, 1 + vreplvei.w $vr7, $vr6, 3 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr5, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr5, $vr5, 0 - fcvt.d.s $fa5, $fa5 - vextrins.d $vr5, $vr6, 16 - xvpermi.q $xr5, $xr7, 2 - vreplvei.w $vr6, $vr4, 3 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr4, 2 + vreplvei.w $vr8, $vr6, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr7, 16 + vreplvei.w $vr7, $vr6, 1 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr4, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr4, $vr4, 0 - fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr6, 16 - xvpermi.q $xr4, $xr7, 2 - xvfsub.d $xr5, $xr2, $xr5 - xvfsub.d $xr4, $xr2, $xr4 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr4, $xr4, $xr3 - xvfmul.d $xr5, $xr5, $xr1 - xvfmul.d $xr4, $xr4, $xr1 - xvpickve.d $xr6, $xr5, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr5, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr5, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr5, $xr5, 3 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr7, $vr5, 48 - xvpickve.d $xr5, $xr4, 1 - fcvt.s.d $fa5, $fa5 - xvpickve.d $xr6, $xr4, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr5, 16 - xvpickve.d $xr5, $xr4, 2 - fcvt.s.d $fa5, $fa5 - vextrins.w $vr6, $vr5, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - xvld $xr5, $a5, 0 - vextrins.w $vr6, $vr4, 48 - xvpermi.q $xr6, $xr7, 2 - xvst $xr6, $a4, 0 - xvpermi.q $xr4, $xr5, 1 - vreplvei.w $vr6, $vr4, 3 + vreplvei.w $vr6, $vr6, 0 fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr4, 2 + vextrins.d $vr6, $vr7, 16 + xvpermi.q $xr6, $xr8, 2 + vreplvei.w $vr7, $vr5, 3 fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vreplvei.w $vr6, $vr4, 1 - fcvt.d.s $fa6, $fa6 - vreplvei.w $vr4, $vr4, 0 - fcvt.d.s $fa4, $fa4 - vextrins.d $vr4, $vr6, 16 - xvpermi.q $xr4, $xr7, 2 + vreplvei.w $vr8, $vr5, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr7, 16 + vreplvei.w $vr7, $vr5, 1 + fcvt.d.s $fa7, $fa7 + vreplvei.w $vr5, $vr5, 0 + fcvt.d.s $fa5, $fa5 + vextrins.d $vr5, $vr7, 16 + xvpermi.q $xr5, $xr8, 2 + xvfsub.d $xr6, $xr3, $xr6 + xvfsub.d $xr5, $xr3, $xr5 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr5, $xr5, $xr4 + xvfmul.d $xr6, $xr6, $xr2 + xvfmul.d $xr5, $xr5, $xr2 + xvld $xr7, $a5, 0 + xvfcvt.s.d $xr5, $xr6, $xr5 + xvperm.w $xr5, $xr5, $xr1 + xvst $xr5, $a4, 0 + xvpermi.q $xr5, $xr7, 1 vreplvei.w $vr6, $vr5, 3 fcvt.d.s $fa6, $fa6 - vreplvei.w $vr7, $vr5, 2 - fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 + vreplvei.w $vr8, $vr5, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr6, 16 vreplvei.w $vr6, $vr5, 1 fcvt.d.s $fa6, $fa6 vreplvei.w $vr5, $vr5, 0 fcvt.d.s $fa5, $fa5 vextrins.d $vr5, $vr6, 16 - xvpermi.q $xr5, $xr7, 2 - xvfsub.d $xr4, $xr2, $xr4 - xvfsub.d $xr5, $xr2, $xr5 - xvfmul.d $xr4, $xr4, $xr3 - xvfmul.d $xr5, $xr5, $xr3 - xvfmul.d $xr4, $xr4, $xr1 - xvfmul.d $xr5, $xr5, $xr1 - xvpickve.d $xr6, $xr4, 1 - fcvt.s.d $fa6, $fa6 - xvpickve.d $xr7, $xr4, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr6, 16 - xvpickve.d $xr6, $xr4, 2 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr7, $vr6, 32 - xvpickve.d $xr4, $xr4, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr7, $vr4, 48 - xvpickve.d $xr4, $xr5, 1 - fcvt.s.d $fa4, $fa4 - xvpickve.d $xr6, $xr5, 0 - fcvt.s.d $fa6, $fa6 - vextrins.w $vr6, $vr4, 16 - xvpickve.d $xr4, $xr5, 2 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 32 - xvpickve.d $xr4, $xr5, 3 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr6, $vr4, 48 - xvpermi.q $xr6, $xr7, 2 - xvst $xr6, $a5, 0 + xvpermi.q $xr5, $xr8, 2 + vreplvei.w $vr6, $vr7, 3 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr8, $vr7, 2 + fcvt.d.s $ft0, $ft0 + vextrins.d $vr8, $vr6, 16 + vreplvei.w $vr6, $vr7, 1 + fcvt.d.s $fa6, $fa6 + vreplvei.w $vr7, $vr7, 0 + fcvt.d.s $fa7, $fa7 + vextrins.d $vr7, $vr6, 16 + xvpermi.q $xr7, $xr8, 2 + xvfsub.d $xr5, $xr3, $xr5 + xvfsub.d $xr6, $xr3, $xr7 + xvfmul.d $xr5, $xr5, $xr4 + xvfmul.d $xr6, $xr6, $xr4 + xvfmul.d $xr5, $xr5, $xr2 + xvfmul.d $xr6, $xr6, $xr2 + xvfcvt.s.d $xr5, $xr5, $xr6 + xvperm.w $xr5, $xr5, $xr1 + xvst $xr5, $a5, 0 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 addi.d $a4, $a4, 32 - bnez $a6, .LBB4_163 -# %bb.164: # %middle.block689 - beq $a1, $a3, .LBB4_47 - b .LBB4_45 -.LBB4_165: # %vector.ph645 + bnez $a6, .LBB4_162 +# %bb.163: # %middle.block689 + beq $a1, $a3, .LBB4_46 + b .LBB4_44 +.LBB4_164: # %vector.ph645 slli.d $a4, $s1, 3 bstrpick.d $a2, $fp, 30, 3 slli.d $a2, $a2, 3 @@ -3058,7 +2925,7 @@ partA__align: # @partA__align addi.d $a4, $a4, 32 move $a5, $a2 .p2align 4, , 16 -.LBB4_166: # %vector.body648 +.LBB4_165: # %vector.body648 # =>This Inner Loop Header: Depth=1 xvld $xr0, $a4, -32 xvld $xr1, $a4, 0 @@ -3067,11 +2934,11 @@ partA__align: # @partA__align addi.d $a5, $a5, -8 addi.d $a3, $a3, 64 addi.d $a4, $a4, 64 - bnez $a5, .LBB4_166 -# %bb.167: # %middle.block653 + bnez $a5, .LBB4_165 +# %bb.166: # %middle.block653 beq $a2, $fp, .LBB4_23 b .LBB4_21 -.LBB4_168: # %vector.memcheck723 +.LBB4_167: # %vector.memcheck723 ld.d $a6, $sp, 280 # 8-byte Folded Reload addi.d $a5, $a6, 4 alsl.d $a6, $a1, $a6, 2 @@ -3079,50 +2946,50 @@ partA__align: # @partA__align sltu $a7, $a5, $a7 sltu $t0, $a0, $a6 and $a7, $a7, $t0 - bnez $a7, .LBB4_58 -# %bb.169: # %vector.memcheck723 + bnez $a7, .LBB4_57 +# %bb.168: # %vector.memcheck723 alsl.d $a7, $a1, $a2, 2 addi.d $a7, $a7, -4 sltu $a5, $a5, $a7 sltu $a6, $a2, $a6 and $a5, $a5, $a6 - bnez $a5, .LBB4_58 -# %bb.170: # %vector.main.loop.iter.check + bnez $a5, .LBB4_57 +# %bb.169: # %vector.main.loop.iter.check ori $a4, $zero, 16 - bgeu $a3, $a4, .LBB4_296 -# %bb.171: + bgeu $a3, $a4, .LBB4_295 +# %bb.170: move $a5, $zero - b .LBB4_300 -.LBB4_172: # %vector.memcheck763 + b .LBB4_299 +.LBB4_171: # %vector.memcheck763 addi.d $a5, $t3, 4 alsl.d $a6, $a1, $t3, 2 addi.d $a7, $a0, 4 sltu $a7, $a5, $a7 sltu $t0, $a0, $a6 and $a7, $a7, $t0 - bnez $a7, .LBB4_62 -# %bb.173: # %vector.memcheck763 + bnez $a7, .LBB4_61 +# %bb.172: # %vector.memcheck763 alsl.d $a7, $a1, $a2, 2 addi.d $a7, $a7, -4 sltu $a5, $a5, $a7 sltu $a6, $a2, $a6 and $a5, $a5, $a6 - bnez $a5, .LBB4_62 -# %bb.174: # %vector.main.loop.iter.check780 + bnez $a5, .LBB4_61 +# %bb.173: # %vector.main.loop.iter.check780 ori $a4, $zero, 16 - bgeu $a3, $a4, .LBB4_303 -# %bb.175: + bgeu $a3, $a4, .LBB4_302 +# %bb.174: move $a5, $zero - b .LBB4_307 -.LBB4_176: # %vector.ph902 - pcalau12i $a5, %pc_hi20(.LCPI4_5) - vld $vr0, $a5, %pc_lo12(.LCPI4_5) + b .LBB4_306 +.LBB4_175: # %vector.ph902 + pcalau12i $a5, %pc_hi20(.LCPI4_6) + vld $vr0, $a5, %pc_lo12(.LCPI4_6) bstrpick.d $a4, $a4, 31, 2 slli.d $a4, $a4, 2 - addi.d $a5, $s5, 16 + addi.d $a5, $ra, 16 move $a6, $a4 .p2align 4, , 16 -.LBB4_177: # %vector.body905 +.LBB4_176: # %vector.body905 # =>This Inner Loop Header: Depth=1 vld $vr1, $a5, -16 vld $vr2, $a5, 0 @@ -3140,51 +3007,51 @@ partA__align: # @partA__align vaddi.du $vr0, $vr0, 4 addi.d $a6, $a6, -4 addi.d $a5, $a5, 32 - bnez $a6, .LBB4_177 -# %bb.178: # %middle.block912 - beq $a2, $a4, .LBB4_181 -.LBB4_179: # %.lr.ph17.i359.preheader - alsl.d $a5, $a4, $s5, 3 + bnez $a6, .LBB4_176 +# %bb.177: # %middle.block912 + beq $a2, $a4, .LBB4_180 +.LBB4_178: # %.lr.ph17.i359.preheader + alsl.d $a5, $a4, $ra, 3 sub.d $a2, $a2, $a4 addi.d $a4, $a4, 1 .p2align 4, , 16 -.LBB4_180: # %.lr.ph17.i359 +.LBB4_179: # %.lr.ph17.i359 # =>This Inner Loop Header: Depth=1 ld.d $a6, $a5, 0 st.w $a4, $a6, 0 addi.d $a5, $a5, 8 addi.d $a2, $a2, -1 addi.d $a4, $a4, 1 - bnez $a2, .LBB4_180 -.LBB4_181: # %.preheader8.i363 - bltz $a3, .LBB4_250 -# %bb.182: # %iter.check918 - ld.d $a2, $s5, 0 + bnez $a2, .LBB4_179 +.LBB4_180: # %.preheader8.i363 + bltz $a3, .LBB4_249 +# %bb.181: # %iter.check918 + ld.d $a2, $ra, 0 addi.d $a5, $a0, 1 bstrpick.d $a3, $a5, 31, 0 ori $a4, $zero, 4 - bgeu $a3, $a4, .LBB4_184 -# %bb.183: + bgeu $a3, $a4, .LBB4_183 +# %bb.182: move $a4, $zero - b .LBB4_248 -.LBB4_184: # %vector.main.loop.iter.check920 + b .LBB4_247 +.LBB4_183: # %vector.main.loop.iter.check920 ori $a4, $zero, 16 - bgeu $a3, $a4, .LBB4_241 -# %bb.185: + bgeu $a3, $a4, .LBB4_240 +# %bb.184: move $a4, $zero - b .LBB4_245 -.LBB4_186: # %vector.ph971 + b .LBB4_244 +.LBB4_185: # %vector.ph971 andi $a7, $a6, 12 bstrpick.d $a5, $a6, 31, 4 slli.d $a5, $a5, 4 - pcalau12i $t0, %pc_hi20(.LCPI4_6) - xvld $xr0, $t0, %pc_lo12(.LCPI4_6) + pcalau12i $t0, %pc_hi20(.LCPI4_7) + xvld $xr0, $t0, %pc_lo12(.LCPI4_7) addi.d $t0, $a3, 32 xvrepli.b $xr1, -1 xvrepli.w $xr2, -9 move $t1, $a5 .p2align 4, , 16 -.LBB4_187: # %vector.body974 +.LBB4_186: # %vector.body974 # =>This Inner Loop Header: Depth=1 xvxor.v $xr3, $xr0, $xr1 xvsub.w $xr4, $xr2, $xr0 @@ -3193,15 +3060,15 @@ partA__align: # @partA__align xvaddi.wu $xr0, $xr0, 16 addi.d $t1, $t1, -16 addi.d $t0, $t0, 64 - bnez $t1, .LBB4_187 -# %bb.188: # %middle.block980 - beq $a4, $a5, .LBB4_195 -# %bb.189: # %vec.epilog.iter.check984 - beqz $a7, .LBB4_193 -.LBB4_190: # %vec.epilog.ph986 + bnez $t1, .LBB4_186 +# %bb.187: # %middle.block980 + beq $a4, $a5, .LBB4_194 +# %bb.188: # %vec.epilog.iter.check984 + beqz $a7, .LBB4_192 +.LBB4_189: # %vec.epilog.ph986 move $a7, $a5 - pcalau12i $a5, %pc_hi20(.LCPI4_7) - vld $vr0, $a5, %pc_lo12(.LCPI4_7) + pcalau12i $a5, %pc_hi20(.LCPI4_8) + vld $vr0, $a5, %pc_lo12(.LCPI4_8) bstrpick.d $a5, $a6, 31, 2 slli.d $a5, $a5, 2 vreplgr2vr.w $vr1, $a7 @@ -3210,63 +3077,63 @@ partA__align: # @partA__align alsl.d $a7, $a7, $a3, 2 vrepli.b $vr1, -1 .p2align 4, , 16 -.LBB4_191: # %vec.epilog.vector.body992 +.LBB4_190: # %vec.epilog.vector.body992 # =>This Inner Loop Header: Depth=1 vxor.v $vr2, $vr0, $vr1 vst $vr2, $a7, 0 vaddi.wu $vr0, $vr0, 4 addi.d $a6, $a6, 4 addi.d $a7, $a7, 16 - bnez $a6, .LBB4_191 -# %bb.192: # %vec.epilog.middle.block997 - beq $a4, $a5, .LBB4_195 -.LBB4_193: # %vec.epilog.scalar.ph983.preheader + bnez $a6, .LBB4_190 +# %bb.191: # %vec.epilog.middle.block997 + beq $a4, $a5, .LBB4_194 +.LBB4_192: # %vec.epilog.scalar.ph983.preheader alsl.d $a3, $a5, $a3, 2 nor $a6, $a5, $zero sub.d $a4, $a4, $a5 .p2align 4, , 16 -.LBB4_194: # %vec.epilog.scalar.ph983 +.LBB4_193: # %vec.epilog.scalar.ph983 # =>This Inner Loop Header: Depth=1 st.w $a6, $a3, 0 addi.d $a3, $a3, 4 addi.d $a4, $a4, -1 addi.d $a6, $a6, -1 - bnez $a4, .LBB4_194 -.LBB4_195: # %.preheader7.i - blez $s1, .LBB4_198 -# %bb.196: # %.lr.ph22.i + bnez $a4, .LBB4_193 +.LBB4_194: # %.preheader7.i + blez $s1, .LBB4_197 +# %bb.195: # %.lr.ph22.i move $a3, $s3 move $a4, $s1 .p2align 4, , 16 -.LBB4_197: # =>This Inner Loop Header: Depth=1 +.LBB4_196: # =>This Inner Loop Header: Depth=1 ld.d $a5, $a3, 0 add.d $a6, $a5, $a1 st.d $a6, $a3, 0 stx.b $zero, $a5, $a1 addi.d $a4, $a4, -1 addi.d $a3, $a3, 8 - bnez $a4, .LBB4_197 -.LBB4_198: # %.preheader6.i + bnez $a4, .LBB4_196 +.LBB4_197: # %.preheader6.i ld.d $a2, $sp, 384 st.d $a2, $sp, 264 # 8-byte Folded Spill - blez $fp, .LBB4_201 -# %bb.199: # %.lr.ph24.i + blez $fp, .LBB4_200 +# %bb.198: # %.lr.ph24.i move $a4, $s7 move $a5, $fp .p2align 4, , 16 -.LBB4_200: # =>This Inner Loop Header: Depth=1 +.LBB4_199: # =>This Inner Loop Header: Depth=1 ld.d $a6, $a4, 0 add.d $a7, $a6, $a1 st.d $a7, $a4, 0 stx.b $zero, $a6, $a1 addi.d $a5, $a5, -1 addi.d $a4, $a4, 8 - bnez $a5, .LBB4_200 -.LBB4_201: # %._crit_edge.i + bnez $a5, .LBB4_199 +.LBB4_200: # %._crit_edge.i ld.d $a2, $sp, 264 # 8-byte Folded Reload st.w $zero, $a2, 0 - bltz $a1, .LBB4_286 -# %bb.202: # %.lr.ph53.i + bltz $a1, .LBB4_285 +# %bb.201: # %.lr.ph53.i move $a4, $zero pcalau12i $a5, %pc_hi20(impmtx) ld.d $a2, $a5, %pc_lo12(impmtx) @@ -3276,68 +3143,68 @@ partA__align: # @partA__align addi.w $t0, $zero, -1 ori $t1, $zero, 45 move $t2, $a0 - b .LBB4_204 + b .LBB4_203 .p2align 4, , 16 -.LBB4_203: # %._crit_edge47.i - # in Loop: Header=BB4_204 Depth=1 +.LBB4_202: # %._crit_edge47.i + # in Loop: Header=BB4_203 Depth=1 addi.w $a4, $a4, 2 move $s8, $t3 - blt $a1, $a4, .LBB4_286 -.LBB4_204: # =>This Loop Header: Depth=1 - # Child Loop BB4_211 Depth 2 - # Child Loop BB4_212 Depth 3 - # Child Loop BB4_214 Depth 3 - # Child Loop BB4_219 Depth 2 - # Child Loop BB4_220 Depth 3 - # Child Loop BB4_222 Depth 3 - # Child Loop BB4_230 Depth 2 - # Child Loop BB4_233 Depth 2 + blt $a1, $a4, .LBB4_285 +.LBB4_203: # =>This Loop Header: Depth=1 + # Child Loop BB4_210 Depth 2 + # Child Loop BB4_211 Depth 3 + # Child Loop BB4_213 Depth 3 + # Child Loop BB4_218 Depth 2 + # Child Loop BB4_219 Depth 3 + # Child Loop BB4_221 Depth 3 + # Child Loop BB4_229 Depth 2 + # Child Loop BB4_232 Depth 2 addi.w $t4, $s8, 0 slli.d $t3, $t4, 3 ldx.d $t3, $ra, $t3 addi.w $t5, $t2, 0 slli.d $t6, $t5, 2 ldx.w $t7, $t3, $t6 - bltz $t7, .LBB4_207 -# %bb.205: # in Loop: Header=BB4_204 Depth=1 - beqz $t7, .LBB4_208 -# %bb.206: # in Loop: Header=BB4_204 Depth=1 + bltz $t7, .LBB4_206 +# %bb.204: # in Loop: Header=BB4_203 Depth=1 + beqz $t7, .LBB4_207 +# %bb.205: # in Loop: Header=BB4_203 Depth=1 sub.w $t3, $s8, $t7 - b .LBB4_209 + b .LBB4_208 .p2align 4, , 16 -.LBB4_207: # in Loop: Header=BB4_204 Depth=1 +.LBB4_206: # in Loop: Header=BB4_203 Depth=1 addi.w $t3, $s8, -1 nor $t8, $t3, $zero add.w $s0, $s8, $t8 - bnez $s0, .LBB4_211 - b .LBB4_216 + bnez $s0, .LBB4_210 + b .LBB4_215 .p2align 4, , 16 -.LBB4_208: # in Loop: Header=BB4_204 Depth=1 +.LBB4_207: # in Loop: Header=BB4_203 Depth=1 addi.w $t3, $s8, -1 -.LBB4_209: # in Loop: Header=BB4_204 Depth=1 +.LBB4_208: # in Loop: Header=BB4_203 Depth=1 move $t7, $t0 nor $t8, $t3, $zero add.w $s0, $s8, $t8 - bnez $s0, .LBB4_211 - b .LBB4_216 + bnez $s0, .LBB4_210 + b .LBB4_215 .p2align 4, , 16 -.LBB4_210: # %._crit_edge29.i - # in Loop: Header=BB4_211 Depth=2 +.LBB4_209: # %._crit_edge29.i + # in Loop: Header=BB4_210 Depth=2 addi.d $s0, $s0, -1 - beqz $s0, .LBB4_215 -.LBB4_211: # %.preheader3.i - # Parent Loop BB4_204 Depth=1 + beqz $s0, .LBB4_214 +.LBB4_210: # %.preheader3.i + # Parent Loop BB4_203 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB4_212 Depth 3 - # Child Loop BB4_214 Depth 3 + # Child Loop BB4_211 Depth 3 + # Child Loop BB4_213 Depth 3 move $s4, $s2 move $s6, $s3 move $ra, $a6 - blez $s1, .LBB4_213 + blez $s1, .LBB4_212 .p2align 4, , 16 -.LBB4_212: # %.lr.ph26.i - # Parent Loop BB4_204 Depth=1 - # Parent Loop BB4_211 Depth=2 +.LBB4_211: # %.lr.ph26.i + # Parent Loop BB4_203 Depth=1 + # Parent Loop BB4_210 Depth=2 # => This Inner Loop Header: Depth=3 ld.d $a2, $s4, 0 ld.d $a5, $s6, 0 @@ -3349,16 +3216,16 @@ partA__align: # @partA__align addi.d $ra, $ra, -1 addi.d $s6, $s6, 8 addi.d $s4, $s4, 8 - bnez $ra, .LBB4_212 -.LBB4_213: # %.preheader2.i - # in Loop: Header=BB4_211 Depth=2 + bnez $ra, .LBB4_211 +.LBB4_212: # %.preheader2.i + # in Loop: Header=BB4_210 Depth=2 move $s4, $s7 move $s6, $a7 - blez $fp, .LBB4_210 + blez $fp, .LBB4_209 .p2align 4, , 16 -.LBB4_214: # %.lr.ph28.i - # Parent Loop BB4_204 Depth=1 - # Parent Loop BB4_211 Depth=2 +.LBB4_213: # %.lr.ph28.i + # Parent Loop BB4_203 Depth=1 + # Parent Loop BB4_210 Depth=2 # => This Inner Loop Header: Depth=3 ld.d $a2, $s4, 0 addi.d $a3, $a2, -1 @@ -3366,41 +3233,41 @@ partA__align: # @partA__align st.b $t1, $a2, -1 addi.d $s6, $s6, -1 addi.d $s4, $s4, 8 - bnez $s6, .LBB4_214 - b .LBB4_210 + bnez $s6, .LBB4_213 + b .LBB4_209 .p2align 4, , 16 -.LBB4_215: # %._crit_edge32.loopexit.i - # in Loop: Header=BB4_204 Depth=1 +.LBB4_214: # %._crit_edge32.loopexit.i + # in Loop: Header=BB4_203 Depth=1 add.d $a2, $s8, $a4 add.d $a4, $t8, $a2 ld.d $s6, $sp, 96 # 8-byte Folded Reload - ld.d $ra, $sp, 272 # 8-byte Folded Reload -.LBB4_216: # %._crit_edge32.i - # in Loop: Header=BB4_204 Depth=1 + ld.d $ra, $sp, 280 # 8-byte Folded Reload +.LBB4_215: # %._crit_edge32.i + # in Loop: Header=BB4_203 Depth=1 add.w $t2, $t7, $t2 - beq $t7, $t0, .LBB4_223 -# %bb.217: # %.preheader1.preheader.i - # in Loop: Header=BB4_204 Depth=1 + beq $t7, $t0, .LBB4_222 +# %bb.216: # %.preheader1.preheader.i + # in Loop: Header=BB4_203 Depth=1 nor $t7, $t7, $zero - b .LBB4_219 + b .LBB4_218 .p2align 4, , 16 -.LBB4_218: # %._crit_edge38.i - # in Loop: Header=BB4_219 Depth=2 +.LBB4_217: # %._crit_edge38.i + # in Loop: Header=BB4_218 Depth=2 addi.d $t7, $t7, -1 addi.d $a4, $a4, 1 - beqz $t7, .LBB4_223 -.LBB4_219: # %.preheader1.i - # Parent Loop BB4_204 Depth=1 + beqz $t7, .LBB4_222 +.LBB4_218: # %.preheader1.i + # Parent Loop BB4_203 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB4_220 Depth 3 - # Child Loop BB4_222 Depth 3 + # Child Loop BB4_219 Depth 3 + # Child Loop BB4_221 Depth 3 move $t8, $s3 move $s0, $a6 - blez $s1, .LBB4_221 + blez $s1, .LBB4_220 .p2align 4, , 16 -.LBB4_220: # %.lr.ph35.i - # Parent Loop BB4_204 Depth=1 - # Parent Loop BB4_219 Depth=2 +.LBB4_219: # %.lr.ph35.i + # Parent Loop BB4_203 Depth=1 + # Parent Loop BB4_218 Depth=2 # => This Inner Loop Header: Depth=3 ld.d $a2, $t8, 0 addi.d $a3, $a2, -1 @@ -3408,17 +3275,17 @@ partA__align: # @partA__align st.b $t1, $a2, -1 addi.d $s0, $s0, -1 addi.d $t8, $t8, 8 - bnez $s0, .LBB4_220 -.LBB4_221: # %.preheader.i - # in Loop: Header=BB4_219 Depth=2 + bnez $s0, .LBB4_219 +.LBB4_220: # %.preheader.i + # in Loop: Header=BB4_218 Depth=2 move $t8, $s6 move $s0, $s7 move $s4, $a7 - blez $fp, .LBB4_218 + blez $fp, .LBB4_217 .p2align 4, , 16 -.LBB4_222: # %.lr.ph37.i - # Parent Loop BB4_204 Depth=1 - # Parent Loop BB4_219 Depth=2 +.LBB4_221: # %.lr.ph37.i + # Parent Loop BB4_203 Depth=1 + # Parent Loop BB4_218 Depth=2 # => This Inner Loop Header: Depth=3 ld.d $a2, $t8, 0 ld.d $a3, $s0, 0 @@ -3430,18 +3297,17 @@ partA__align: # @partA__align addi.d $s4, $s4, -1 addi.d $s0, $s0, 8 addi.d $t8, $t8, 8 - bnez $s4, .LBB4_222 - b .LBB4_218 + bnez $s4, .LBB4_221 + b .LBB4_217 .p2align 4, , 16 -.LBB4_223: # %._crit_edge41.i - # in Loop: Header=BB4_204 Depth=1 - ld.d $a2, $sp, 280 # 8-byte Folded Reload - beq $t4, $a2, .LBB4_226 -# %bb.224: # %._crit_edge41.i - # in Loop: Header=BB4_204 Depth=1 +.LBB4_222: # %._crit_edge41.i + # in Loop: Header=BB4_203 Depth=1 + beq $t4, $s5, .LBB4_225 +# %bb.223: # %._crit_edge41.i + # in Loop: Header=BB4_203 Depth=1 addi.w $a2, $a0, 0 - beq $t5, $a2, .LBB4_226 -# %bb.225: # in Loop: Header=BB4_204 Depth=1 + beq $t5, $a2, .LBB4_225 +# %bb.224: # in Loop: Header=BB4_203 Depth=1 slli.d $a2, $t4, 2 ld.d $a3, $sp, 192 # 8-byte Folded Reload ldx.w $a2, $a3, $a2 @@ -3452,27 +3318,28 @@ partA__align: # @partA__align slli.d $a2, $a2, 3 ld.d $a5, $sp, 256 # 8-byte Folded Reload ldx.d $a2, $a5, $a2 - add.w $a3, $a3, $s5 + ld.d $a5, $sp, 272 # 8-byte Folded Reload + add.w $a3, $a3, $a5 slli.d $a3, $a3, 2 fldx.s $fa0, $a2, $a3 ld.d $a2, $sp, 264 # 8-byte Folded Reload fld.s $fa1, $a2, 0 fadd.s $fa0, $fa0, $fa1 fst.s $fa0, $a2, 0 -.LBB4_226: # in Loop: Header=BB4_204 Depth=1 - blez $t4, .LBB4_286 -# %bb.227: # in Loop: Header=BB4_204 Depth=1 - blez $t5, .LBB4_286 -# %bb.228: # %.preheader5.i - # in Loop: Header=BB4_204 Depth=1 - blez $s1, .LBB4_231 -# %bb.229: # %.lr.ph44.i - # in Loop: Header=BB4_204 Depth=1 +.LBB4_225: # in Loop: Header=BB4_203 Depth=1 + blez $t4, .LBB4_285 +# %bb.226: # in Loop: Header=BB4_203 Depth=1 + blez $t5, .LBB4_285 +# %bb.227: # %.preheader5.i + # in Loop: Header=BB4_203 Depth=1 + blez $s1, .LBB4_230 +# %bb.228: # %.lr.ph44.i + # in Loop: Header=BB4_203 Depth=1 move $t4, $s2 move $t5, $s3 move $t6, $a6 .p2align 4, , 16 -.LBB4_230: # Parent Loop BB4_204 Depth=1 +.LBB4_229: # Parent Loop BB4_203 Depth=1 # => This Inner Loop Header: Depth=2 ld.d $a2, $t4, 0 ld.d $a3, $t5, 0 @@ -3483,17 +3350,17 @@ partA__align: # @partA__align addi.d $t6, $t6, -1 addi.d $t5, $t5, 8 addi.d $t4, $t4, 8 - bnez $t6, .LBB4_230 -.LBB4_231: # %.preheader4.i - # in Loop: Header=BB4_204 Depth=1 - blez $fp, .LBB4_203 -# %bb.232: # %.lr.ph46.i - # in Loop: Header=BB4_204 Depth=1 + bnez $t6, .LBB4_229 +.LBB4_230: # %.preheader4.i + # in Loop: Header=BB4_203 Depth=1 + blez $fp, .LBB4_202 +# %bb.231: # %.lr.ph46.i + # in Loop: Header=BB4_203 Depth=1 move $t4, $s6 move $t5, $s7 move $t6, $a7 .p2align 4, , 16 -.LBB4_233: # Parent Loop BB4_204 Depth=1 +.LBB4_232: # Parent Loop BB4_203 Depth=1 # => This Inner Loop Header: Depth=2 ld.d $a2, $t4, 0 ld.d $a3, $t5, 0 @@ -3504,9 +3371,9 @@ partA__align: # @partA__align addi.d $t6, $t6, -1 addi.d $t5, $t5, 8 addi.d $t4, $t4, 8 - bnez $t6, .LBB4_233 - b .LBB4_203 -.LBB4_234: # %vector.ph832 + bnez $t6, .LBB4_232 + b .LBB4_202 +.LBB4_233: # %vector.ph832 andi $a5, $a4, 12 move $a6, $a4 bstrins.d $a6, $zero, 3, 0 @@ -3517,7 +3384,7 @@ partA__align: # @partA__align xvrepli.b $xr1, 0 move $t2, $a6 .p2align 4, , 16 -.LBB4_235: # %vector.body835 +.LBB4_234: # %vector.body835 # =>This Inner Loop Header: Depth=1 xvld $xr2, $a7, -32 xvld $xr3, $a7, 0 @@ -3531,12 +3398,12 @@ partA__align: # @partA__align addi.d $a7, $a7, 64 addi.d $t0, $t0, 64 addi.d $t1, $t1, 64 - bnez $t2, .LBB4_235 -# %bb.236: # %middle.block842 - beq $a4, $a6, .LBB4_82 -# %bb.237: # %vec.epilog.iter.check846 - beqz $a5, .LBB4_310 -.LBB4_238: # %vec.epilog.ph848 + bnez $t2, .LBB4_234 +# %bb.235: # %middle.block842 + beq $a4, $a6, .LBB4_81 +# %bb.236: # %vec.epilog.iter.check846 + beqz $a5, .LBB4_309 +.LBB4_237: # %vec.epilog.ph848 move $a7, $a4 bstrins.d $a7, $zero, 1, 0 ori $t0, $zero, 1 @@ -3551,7 +3418,7 @@ partA__align: # @partA__align sub.d $a6, $a6, $a7 vrepli.b $vr1, 0 .p2align 4, , 16 -.LBB4_239: # %vec.epilog.vector.body851 +.LBB4_238: # %vec.epilog.vector.body851 # =>This Inner Loop Header: Depth=1 vld $vr2, $t0, 0 vfadd.s $vr2, $vr2, $vr0 @@ -3561,22 +3428,22 @@ partA__align: # @partA__align addi.d $t1, $t1, 16 addi.d $a6, $a6, 4 addi.d $t2, $t2, 16 - bnez $a6, .LBB4_239 -# %bb.240: # %vec.epilog.middle.block858 - bne $a4, $a7, .LBB4_80 - b .LBB4_82 -.LBB4_241: # %vector.ph921 + bnez $a6, .LBB4_238 +# %bb.239: # %vec.epilog.middle.block858 + bne $a4, $a7, .LBB4_79 + b .LBB4_81 +.LBB4_240: # %vector.ph921 andi $a6, $a5, 12 bstrpick.d $a4, $a5, 31, 4 slli.d $a4, $a4, 4 - pcalau12i $a7, %pc_hi20(.LCPI4_6) - xvld $xr0, $a7, %pc_lo12(.LCPI4_6) + pcalau12i $a7, %pc_hi20(.LCPI4_7) + xvld $xr0, $a7, %pc_lo12(.LCPI4_7) addi.d $a7, $a2, 32 xvrepli.b $xr1, -1 xvrepli.w $xr2, -9 move $t0, $a4 .p2align 4, , 16 -.LBB4_242: # %vector.body924 +.LBB4_241: # %vector.body924 # =>This Inner Loop Header: Depth=1 xvxor.v $xr3, $xr0, $xr1 xvsub.w $xr4, $xr2, $xr0 @@ -3585,15 +3452,15 @@ partA__align: # @partA__align xvaddi.wu $xr0, $xr0, 16 addi.d $t0, $t0, -16 addi.d $a7, $a7, 64 - bnez $t0, .LBB4_242 -# %bb.243: # %middle.block930 - beq $a3, $a4, .LBB4_250 -# %bb.244: # %vec.epilog.iter.check934 - beqz $a6, .LBB4_248 -.LBB4_245: # %vec.epilog.ph936 + bnez $t0, .LBB4_241 +# %bb.242: # %middle.block930 + beq $a3, $a4, .LBB4_249 +# %bb.243: # %vec.epilog.iter.check934 + beqz $a6, .LBB4_247 +.LBB4_244: # %vec.epilog.ph936 move $a6, $a4 - pcalau12i $a4, %pc_hi20(.LCPI4_7) - vld $vr0, $a4, %pc_lo12(.LCPI4_7) + pcalau12i $a4, %pc_hi20(.LCPI4_8) + vld $vr0, $a4, %pc_lo12(.LCPI4_8) bstrpick.d $a4, $a5, 31, 2 slli.d $a4, $a4, 2 vreplgr2vr.w $vr1, $a6 @@ -3602,126 +3469,126 @@ partA__align: # @partA__align alsl.d $a6, $a6, $a2, 2 vrepli.b $vr1, -1 .p2align 4, , 16 -.LBB4_246: # %vec.epilog.vector.body941 +.LBB4_245: # %vec.epilog.vector.body941 # =>This Inner Loop Header: Depth=1 vxor.v $vr2, $vr0, $vr1 vst $vr2, $a6, 0 vaddi.wu $vr0, $vr0, 4 addi.d $a5, $a5, 4 addi.d $a6, $a6, 16 - bnez $a5, .LBB4_246 -# %bb.247: # %vec.epilog.middle.block946 - beq $a3, $a4, .LBB4_250 -.LBB4_248: # %vec.epilog.scalar.ph933.preheader + bnez $a5, .LBB4_245 +# %bb.246: # %vec.epilog.middle.block946 + beq $a3, $a4, .LBB4_249 +.LBB4_247: # %vec.epilog.scalar.ph933.preheader alsl.d $a2, $a4, $a2, 2 nor $a5, $a4, $zero sub.d $a3, $a3, $a4 .p2align 4, , 16 -.LBB4_249: # %vec.epilog.scalar.ph933 +.LBB4_248: # %vec.epilog.scalar.ph933 # =>This Inner Loop Header: Depth=1 st.w $a5, $a2, 0 addi.d $a2, $a2, 4 addi.d $a3, $a3, -1 addi.d $a5, $a5, -1 - bnez $a3, .LBB4_249 -.LBB4_250: # %.preheader7.i369 - blez $s1, .LBB4_253 -# %bb.251: # %.lr.ph22.i428 + bnez $a3, .LBB4_248 +.LBB4_249: # %.preheader7.i369 + blez $s1, .LBB4_252 +# %bb.250: # %.lr.ph22.i428 move $a2, $s3 move $a3, $s1 .p2align 4, , 16 -.LBB4_252: # =>This Inner Loop Header: Depth=1 +.LBB4_251: # =>This Inner Loop Header: Depth=1 ld.d $a4, $a2, 0 add.d $a5, $a4, $a1 st.d $a5, $a2, 0 stx.b $zero, $a4, $a1 addi.d $a3, $a3, -1 addi.d $a2, $a2, 8 - bnez $a3, .LBB4_252 -.LBB4_253: # %.preheader6.i370 - blez $fp, .LBB4_256 -# %bb.254: # %.lr.ph24.i423 + bnez $a3, .LBB4_251 +.LBB4_252: # %.preheader6.i370 + blez $fp, .LBB4_255 +# %bb.253: # %.lr.ph24.i423 move $a2, $s7 move $a3, $fp .p2align 4, , 16 -.LBB4_255: # =>This Inner Loop Header: Depth=1 +.LBB4_254: # =>This Inner Loop Header: Depth=1 ld.d $a4, $a2, 0 add.d $a5, $a4, $a1 st.d $a5, $a2, 0 stx.b $zero, $a4, $a1 addi.d $a3, $a3, -1 addi.d $a2, $a2, 8 - bnez $a3, .LBB4_255 -.LBB4_256: # %._crit_edge.i371 - bltz $a1, .LBB4_286 -# %bb.257: # %.lr.ph53.i372 + bnez $a3, .LBB4_254 +.LBB4_255: # %._crit_edge.i371 + bltz $a1, .LBB4_285 +# %bb.256: # %.lr.ph53.i372 move $a3, $zero bstrpick.d $a2, $s1, 31, 0 bstrpick.d $a4, $fp, 31, 0 addi.w $a5, $zero, -1 ori $a6, $zero, 45 - b .LBB4_259 + b .LBB4_258 .p2align 4, , 16 -.LBB4_258: # %._crit_edge47.i398 - # in Loop: Header=BB4_259 Depth=1 +.LBB4_257: # %._crit_edge47.i398 + # in Loop: Header=BB4_258 Depth=1 addi.w $a3, $a3, 2 move $s8, $a7 - blt $a1, $a3, .LBB4_286 -.LBB4_259: # =>This Loop Header: Depth=1 - # Child Loop BB4_266 Depth 2 - # Child Loop BB4_267 Depth 3 - # Child Loop BB4_269 Depth 3 - # Child Loop BB4_274 Depth 2 - # Child Loop BB4_275 Depth 3 - # Child Loop BB4_277 Depth 3 - # Child Loop BB4_282 Depth 2 - # Child Loop BB4_285 Depth 2 + blt $a1, $a3, .LBB4_285 +.LBB4_258: # =>This Loop Header: Depth=1 + # Child Loop BB4_265 Depth 2 + # Child Loop BB4_266 Depth 3 + # Child Loop BB4_268 Depth 3 + # Child Loop BB4_273 Depth 2 + # Child Loop BB4_274 Depth 3 + # Child Loop BB4_276 Depth 3 + # Child Loop BB4_281 Depth 2 + # Child Loop BB4_284 Depth 2 addi.w $t0, $s8, 0 slli.d $a7, $t0, 3 - ldx.d $a7, $s5, $a7 + ldx.d $a7, $ra, $a7 addi.w $t1, $a0, 0 slli.d $t2, $t1, 2 ldx.w $t2, $a7, $t2 - bltz $t2, .LBB4_262 -# %bb.260: # in Loop: Header=BB4_259 Depth=1 - beqz $t2, .LBB4_263 -# %bb.261: # in Loop: Header=BB4_259 Depth=1 + bltz $t2, .LBB4_261 +# %bb.259: # in Loop: Header=BB4_258 Depth=1 + beqz $t2, .LBB4_262 +# %bb.260: # in Loop: Header=BB4_258 Depth=1 sub.w $a7, $s8, $t2 - b .LBB4_264 + b .LBB4_263 .p2align 4, , 16 -.LBB4_262: # in Loop: Header=BB4_259 Depth=1 +.LBB4_261: # in Loop: Header=BB4_258 Depth=1 addi.w $a7, $s8, -1 nor $t3, $a7, $zero add.w $t4, $s8, $t3 - bnez $t4, .LBB4_266 - b .LBB4_271 + bnez $t4, .LBB4_265 + b .LBB4_270 .p2align 4, , 16 -.LBB4_263: # in Loop: Header=BB4_259 Depth=1 +.LBB4_262: # in Loop: Header=BB4_258 Depth=1 addi.w $a7, $s8, -1 -.LBB4_264: # in Loop: Header=BB4_259 Depth=1 +.LBB4_263: # in Loop: Header=BB4_258 Depth=1 move $t2, $a5 nor $t3, $a7, $zero add.w $t4, $s8, $t3 - bnez $t4, .LBB4_266 - b .LBB4_271 + bnez $t4, .LBB4_265 + b .LBB4_270 .p2align 4, , 16 -.LBB4_265: # %._crit_edge29.i384 - # in Loop: Header=BB4_266 Depth=2 +.LBB4_264: # %._crit_edge29.i384 + # in Loop: Header=BB4_265 Depth=2 addi.d $t4, $t4, -1 - beqz $t4, .LBB4_270 -.LBB4_266: # %.preheader3.i381 - # Parent Loop BB4_259 Depth=1 + beqz $t4, .LBB4_269 +.LBB4_265: # %.preheader3.i381 + # Parent Loop BB4_258 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB4_267 Depth 3 - # Child Loop BB4_269 Depth 3 + # Child Loop BB4_266 Depth 3 + # Child Loop BB4_268 Depth 3 move $t5, $s2 move $t6, $s3 move $t7, $a2 - blez $s1, .LBB4_268 + blez $s1, .LBB4_267 .p2align 4, , 16 -.LBB4_267: # %.lr.ph26.i419 - # Parent Loop BB4_259 Depth=1 - # Parent Loop BB4_266 Depth=2 +.LBB4_266: # %.lr.ph26.i419 + # Parent Loop BB4_258 Depth=1 + # Parent Loop BB4_265 Depth=2 # => This Inner Loop Header: Depth=3 ld.d $t8, $t5, 0 ld.d $s0, $t6, 0 @@ -3733,16 +3600,16 @@ partA__align: # @partA__align addi.d $t7, $t7, -1 addi.d $t6, $t6, 8 addi.d $t5, $t5, 8 - bnez $t7, .LBB4_267 -.LBB4_268: # %.preheader2.i383 - # in Loop: Header=BB4_266 Depth=2 + bnez $t7, .LBB4_266 +.LBB4_267: # %.preheader2.i383 + # in Loop: Header=BB4_265 Depth=2 move $t5, $s7 move $t6, $a4 - blez $fp, .LBB4_265 + blez $fp, .LBB4_264 .p2align 4, , 16 -.LBB4_269: # %.lr.ph28.i415 - # Parent Loop BB4_259 Depth=1 - # Parent Loop BB4_266 Depth=2 +.LBB4_268: # %.lr.ph28.i415 + # Parent Loop BB4_258 Depth=1 + # Parent Loop BB4_265 Depth=2 # => This Inner Loop Header: Depth=3 ld.d $t7, $t5, 0 addi.d $t8, $t7, -1 @@ -3750,39 +3617,39 @@ partA__align: # @partA__align st.b $a6, $t7, -1 addi.d $t6, $t6, -1 addi.d $t5, $t5, 8 - bnez $t6, .LBB4_269 - b .LBB4_265 + bnez $t6, .LBB4_268 + b .LBB4_264 .p2align 4, , 16 -.LBB4_270: # %._crit_edge32.loopexit.i386 - # in Loop: Header=BB4_259 Depth=1 +.LBB4_269: # %._crit_edge32.loopexit.i386 + # in Loop: Header=BB4_258 Depth=1 add.d $a3, $s8, $a3 add.d $a3, $t3, $a3 -.LBB4_271: # %._crit_edge32.i387 - # in Loop: Header=BB4_259 Depth=1 +.LBB4_270: # %._crit_edge32.i387 + # in Loop: Header=BB4_258 Depth=1 add.w $a0, $t2, $a0 - beq $t2, $a5, .LBB4_278 -# %bb.272: # %.preheader1.preheader.i388 - # in Loop: Header=BB4_259 Depth=1 + beq $t2, $a5, .LBB4_277 +# %bb.271: # %.preheader1.preheader.i388 + # in Loop: Header=BB4_258 Depth=1 nor $t2, $t2, $zero - b .LBB4_274 + b .LBB4_273 .p2align 4, , 16 -.LBB4_273: # %._crit_edge38.i392 - # in Loop: Header=BB4_274 Depth=2 +.LBB4_272: # %._crit_edge38.i392 + # in Loop: Header=BB4_273 Depth=2 addi.d $t2, $t2, -1 addi.d $a3, $a3, 1 - beqz $t2, .LBB4_278 -.LBB4_274: # %.preheader1.i389 - # Parent Loop BB4_259 Depth=1 + beqz $t2, .LBB4_277 +.LBB4_273: # %.preheader1.i389 + # Parent Loop BB4_258 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB4_275 Depth 3 - # Child Loop BB4_277 Depth 3 + # Child Loop BB4_274 Depth 3 + # Child Loop BB4_276 Depth 3 move $t3, $s3 move $t4, $a2 - blez $s1, .LBB4_276 + blez $s1, .LBB4_275 .p2align 4, , 16 -.LBB4_275: # %.lr.ph35.i411 - # Parent Loop BB4_259 Depth=1 - # Parent Loop BB4_274 Depth=2 +.LBB4_274: # %.lr.ph35.i411 + # Parent Loop BB4_258 Depth=1 + # Parent Loop BB4_273 Depth=2 # => This Inner Loop Header: Depth=3 ld.d $t5, $t3, 0 addi.d $t6, $t5, -1 @@ -3790,17 +3657,17 @@ partA__align: # @partA__align st.b $a6, $t5, -1 addi.d $t4, $t4, -1 addi.d $t3, $t3, 8 - bnez $t4, .LBB4_275 -.LBB4_276: # %.preheader.i391 - # in Loop: Header=BB4_274 Depth=2 + bnez $t4, .LBB4_274 +.LBB4_275: # %.preheader.i391 + # in Loop: Header=BB4_273 Depth=2 move $t3, $s6 move $t4, $s7 move $t5, $a4 - blez $fp, .LBB4_273 + blez $fp, .LBB4_272 .p2align 4, , 16 -.LBB4_277: # %.lr.ph37.i407 - # Parent Loop BB4_259 Depth=1 - # Parent Loop BB4_274 Depth=2 +.LBB4_276: # %.lr.ph37.i407 + # Parent Loop BB4_258 Depth=1 + # Parent Loop BB4_273 Depth=2 # => This Inner Loop Header: Depth=3 ld.d $t6, $t3, 0 ld.d $t7, $t4, 0 @@ -3812,25 +3679,25 @@ partA__align: # @partA__align addi.d $t5, $t5, -1 addi.d $t4, $t4, 8 addi.d $t3, $t3, 8 - bnez $t5, .LBB4_277 - b .LBB4_273 + bnez $t5, .LBB4_276 + b .LBB4_272 .p2align 4, , 16 -.LBB4_278: # %._crit_edge41.i394 - # in Loop: Header=BB4_259 Depth=1 - blez $t0, .LBB4_286 -# %bb.279: # %._crit_edge41.i394 - # in Loop: Header=BB4_259 Depth=1 - blez $t1, .LBB4_286 -# %bb.280: # %.preheader5.i396 - # in Loop: Header=BB4_259 Depth=1 - blez $s1, .LBB4_283 -# %bb.281: # %.lr.ph44.i403 - # in Loop: Header=BB4_259 Depth=1 +.LBB4_277: # %._crit_edge41.i394 + # in Loop: Header=BB4_258 Depth=1 + blez $t0, .LBB4_285 +# %bb.278: # %._crit_edge41.i394 + # in Loop: Header=BB4_258 Depth=1 + blez $t1, .LBB4_285 +# %bb.279: # %.preheader5.i396 + # in Loop: Header=BB4_258 Depth=1 + blez $s1, .LBB4_282 +# %bb.280: # %.lr.ph44.i403 + # in Loop: Header=BB4_258 Depth=1 move $t0, $s2 move $t1, $s3 move $t2, $a2 .p2align 4, , 16 -.LBB4_282: # Parent Loop BB4_259 Depth=1 +.LBB4_281: # Parent Loop BB4_258 Depth=1 # => This Inner Loop Header: Depth=2 ld.d $t3, $t0, 0 ld.d $t4, $t1, 0 @@ -3841,17 +3708,17 @@ partA__align: # @partA__align addi.d $t2, $t2, -1 addi.d $t1, $t1, 8 addi.d $t0, $t0, 8 - bnez $t2, .LBB4_282 -.LBB4_283: # %.preheader4.i397 - # in Loop: Header=BB4_259 Depth=1 - blez $fp, .LBB4_258 -# %bb.284: # %.lr.ph46.i399 - # in Loop: Header=BB4_259 Depth=1 + bnez $t2, .LBB4_281 +.LBB4_282: # %.preheader4.i397 + # in Loop: Header=BB4_258 Depth=1 + blez $fp, .LBB4_257 +# %bb.283: # %.lr.ph46.i399 + # in Loop: Header=BB4_258 Depth=1 move $t0, $s6 move $t1, $s7 move $t2, $a4 .p2align 4, , 16 -.LBB4_285: # Parent Loop BB4_259 Depth=1 +.LBB4_284: # Parent Loop BB4_258 Depth=1 # => This Inner Loop Header: Depth=2 ld.d $t3, $t0, 0 ld.d $t4, $t1, 0 @@ -3862,26 +3729,26 @@ partA__align: # @partA__align addi.d $t2, $t2, -1 addi.d $t1, $t1, 8 addi.d $t0, $t0, 8 - bnez $t2, .LBB4_285 - b .LBB4_258 -.LBB4_286: # %Atracking_localhom.exit + bnez $t2, .LBB4_284 + b .LBB4_257 +.LBB4_285: # %Atracking_localhom.exit ld.d $a0, $s3, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 addi.w $a3, $a0, 0 lu12i.w $a4, 1220 ld.d $a2, $sp, 24 # 8-byte Folded Reload - blt $a2, $a3, .LBB4_295 -# %bb.287: # %Atracking_localhom.exit + blt $a2, $a3, .LBB4_294 +# %bb.286: # %Atracking_localhom.exit ori $a0, $a4, 2881 - bge $a3, $a0, .LBB4_295 -# %bb.288: - blez $s1, .LBB4_291 -.LBB4_289: # %.lr.ph522 + bge $a3, $a0, .LBB4_294 +# %bb.287: + blez $s1, .LBB4_290 +.LBB4_288: # %.lr.ph522 ld.d $a0, $sp, 72 # 8-byte Folded Reload ld.d $s0, $a0, %pc_lo12(partA__align.mseq1) .p2align 4, , 16 -.LBB4_290: # =>This Inner Loop Header: Depth=1 +.LBB4_289: # =>This Inner Loop Header: Depth=1 ld.d $a0, $s2, 0 ld.d $a1, $s0, 0 pcaddu18i $ra, %call36(strcpy) @@ -3889,14 +3756,14 @@ partA__align: # @partA__align addi.d $s0, $s0, 8 addi.d $s1, $s1, -1 addi.d $s2, $s2, 8 - bnez $s1, .LBB4_290 -.LBB4_291: # %.preheader - blez $fp, .LBB4_294 -# %bb.292: # %.lr.ph524 + bnez $s1, .LBB4_289 +.LBB4_290: # %.preheader + blez $fp, .LBB4_293 +# %bb.291: # %.lr.ph524 ld.d $a0, $sp, 80 # 8-byte Folded Reload ld.d $s0, $a0, %pc_lo12(partA__align.mseq2) .p2align 4, , 16 -.LBB4_293: # =>This Inner Loop Header: Depth=1 +.LBB4_292: # =>This Inner Loop Header: Depth=1 ld.d $a0, $s6, 0 ld.d $a1, $s0, 0 pcaddu18i $ra, %call36(strcpy) @@ -3904,8 +3771,8 @@ partA__align: # @partA__align addi.d $s0, $s0, 8 addi.d $fp, $fp, -1 addi.d $s6, $s6, 8 - bnez $fp, .LBB4_293 -.LBB4_294: # %._crit_edge525 + bnez $fp, .LBB4_292 +.LBB4_293: # %._crit_edge525 fmov.s $fa0, $fs0 fld.d $fs0, $sp, 288 # 8-byte Folded Reload ld.d $s8, $sp, 296 # 8-byte Folded Reload @@ -3921,7 +3788,7 @@ partA__align: # @partA__align ld.d $ra, $sp, 376 # 8-byte Folded Reload addi.d $sp, $sp, 384 ret -.LBB4_295: +.LBB4_294: pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) ld.d $a0, $a0, 0 @@ -3934,9 +3801,9 @@ partA__align: # @partA__align addi.d $a0, $a0, %pc_lo12(.L.str.7) pcaddu18i $ra, %call36(ErrorExit) jirl $ra, $ra, 0 - bgtz $s1, .LBB4_289 - b .LBB4_291 -.LBB4_296: # %vector.ph738 + bgtz $s1, .LBB4_288 + b .LBB4_290 +.LBB4_295: # %vector.ph738 andi $a4, $a3, 12 move $a5, $a3 xvldrepl.w $xr0, $a0, 0 @@ -3946,7 +3813,7 @@ partA__align: # @partA__align addi.d $a7, $a7, 36 move $t0, $a5 .p2align 4, , 16 -.LBB4_297: # %vector.body741 +.LBB4_296: # %vector.body741 # =>This Inner Loop Header: Depth=1 xvld $xr1, $a6, -32 xvld $xr2, $a6, 0 @@ -3961,12 +3828,12 @@ partA__align: # @partA__align addi.d $a6, $a6, 64 addi.d $t0, $t0, -16 addi.d $a7, $a7, 64 - bnez $t0, .LBB4_297 -# %bb.298: # %middle.block750 - beq $a3, $a5, .LBB4_60 -# %bb.299: # %vec.epilog.iter.check - beqz $a4, .LBB4_311 -.LBB4_300: # %vec.epilog.ph + bnez $t0, .LBB4_296 +# %bb.297: # %middle.block750 + beq $a3, $a5, .LBB4_59 +# %bb.298: # %vec.epilog.iter.check + beqz $a4, .LBB4_310 +.LBB4_299: # %vec.epilog.ph move $a6, $a3 bstrins.d $a6, $zero, 1, 0 ori $a7, $zero, 1 @@ -3979,7 +3846,7 @@ partA__align: # @partA__align addi.d $t0, $t0, 4 sub.d $a5, $a5, $a6 .p2align 4, , 16 -.LBB4_301: # %vec.epilog.vector.body +.LBB4_300: # %vec.epilog.vector.body # =>This Inner Loop Header: Depth=1 vld $vr1, $a7, 0 vld $vr2, $t0, 0 @@ -3989,11 +3856,11 @@ partA__align: # @partA__align addi.d $a7, $a7, 16 addi.d $a5, $a5, 4 addi.d $t0, $t0, 16 - bnez $a5, .LBB4_301 -# %bb.302: # %vec.epilog.middle.block - bne $a3, $a6, .LBB4_58 - b .LBB4_60 -.LBB4_303: # %vector.ph781 + bnez $a5, .LBB4_300 +# %bb.301: # %vec.epilog.middle.block + bne $a3, $a6, .LBB4_57 + b .LBB4_59 +.LBB4_302: # %vector.ph781 andi $a4, $a3, 12 move $a5, $a3 xvldrepl.w $xr0, $a0, 0 @@ -4002,7 +3869,7 @@ partA__align: # @partA__align addi.d $a7, $t3, 36 move $t0, $a5 .p2align 4, , 16 -.LBB4_304: # %vector.body784 +.LBB4_303: # %vector.body784 # =>This Inner Loop Header: Depth=1 xvld $xr1, $a6, -32 xvld $xr2, $a6, 0 @@ -4017,12 +3884,12 @@ partA__align: # @partA__align addi.d $a6, $a6, 64 addi.d $t0, $t0, -16 addi.d $a7, $a7, 64 - bnez $t0, .LBB4_304 -# %bb.305: # %middle.block793 - beq $a3, $a5, .LBB4_79 -# %bb.306: # %vec.epilog.iter.check797 - beqz $a4, .LBB4_312 -.LBB4_307: # %vec.epilog.ph799 + bnez $t0, .LBB4_303 +# %bb.304: # %middle.block793 + beq $a3, $a5, .LBB4_78 +# %bb.305: # %vec.epilog.iter.check797 + beqz $a4, .LBB4_311 +.LBB4_306: # %vec.epilog.ph799 move $a6, $a3 bstrins.d $a6, $zero, 1, 0 ori $a7, $zero, 1 @@ -4034,7 +3901,7 @@ partA__align: # @partA__align addi.d $t0, $t0, 4 sub.d $a5, $a5, $a6 .p2align 4, , 16 -.LBB4_308: # %vec.epilog.vector.body802 +.LBB4_307: # %vec.epilog.vector.body802 # =>This Inner Loop Header: Depth=1 vld $vr1, $a7, 0 vld $vr2, $t0, 0 @@ -4044,19 +3911,19 @@ partA__align: # @partA__align addi.d $a7, $a7, 16 addi.d $a5, $a5, 4 addi.d $t0, $t0, 16 - bnez $a5, .LBB4_308 -# %bb.309: # %vec.epilog.middle.block810 - bne $a3, $a6, .LBB4_62 + bnez $a5, .LBB4_307 +# %bb.308: # %vec.epilog.middle.block810 + bne $a3, $a6, .LBB4_61 + b .LBB4_78 +.LBB4_309: + addi.d $a5, $a6, 1 b .LBB4_79 .LBB4_310: - addi.d $a5, $a6, 1 - b .LBB4_80 -.LBB4_311: addi.d $a4, $a5, 1 - b .LBB4_58 -.LBB4_312: + b .LBB4_57 +.LBB4_311: addi.d $a4, $a5, 1 - b .LBB4_62 + b .LBB4_61 .Lfunc_end4: .size partA__align, .Lfunc_end4-partA__align # -- End function diff --git a/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jcdctmgr.s b/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jcdctmgr.s index 755e4e3..411d1c6 100644 --- a/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jcdctmgr.s +++ b/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jcdctmgr.s @@ -812,33 +812,24 @@ start_pass_fdctmgr: # @start_pass_fdctmgr ffint.d.w $fa5, $fa1 vextrins.d $vr5, $vr4, 16 vreplvei.d $vr1, $vr0, 0 + vfmul.d $vr4, $vr1, $vr5 + xvori.b $xr5, $xr0, 0 fst.d $fs0, $sp, 184 fst.d $fs1, $sp, 176 fst.d $fs2, $sp, 168 fst.d $fa3, $sp, 160 xvld $xr3, $sp, 160 - vfmul.d $vr4, $vr1, $vr5 - xvori.b $xr5, $xr0, 0 xvinsve0.d $xr5, $xr2, 1 xvpermi.q $xr5, $xr4, 2 + ld.hu $a3, $a2, 0 xvfmul.d $xr2, $xr5, $xr3 xvfmul.d $xr2, $xr2, $xr6 xvfrecip.d $xr2, $xr2 - xvpickve.d $xr3, $xr2, 1 - fcvt.s.d $fa3, $fa3 - xvpickve.d $xr4, $xr2, 0 - fcvt.s.d $fa4, $fa4 - vextrins.w $vr4, $vr3, 16 - xvpickve.d $xr3, $xr2, 2 - ld.hu $a3, $a2, 0 - fcvt.s.d $fa3, $fa3 - vextrins.w $vr4, $vr3, 32 - xvpickve.d $xr2, $xr2, 3 movgr2fr.w $fa3, $a3 ld.hu $a3, $a2, 2 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr4, $vr2, 48 - vst $vr4, $a0, -16 + xvpermi.q $xr4, $xr2, 1 + vfcvt.s.d $vr2, $vr4, $vr2 + vst $vr2, $a0, -16 movgr2fr.w $fa2, $a3 ld.w $a3, $a2, 4 ffint.d.w $fa3, $fa3 @@ -865,18 +856,9 @@ start_pass_fdctmgr: # @start_pass_fdctmgr xvfmul.d $xr0, $xr0, $xr3 xvfmul.d $xr0, $xr0, $xr6 xvfrecip.d $xr0, $xr0 - xvpickve.d $xr1, $xr0, 1 - fcvt.s.d $fa1, $fa1 - xvpickve.d $xr2, $xr0, 0 - fcvt.s.d $fa2, $fa2 - vextrins.w $vr2, $vr1, 16 - xvpickve.d $xr1, $xr0, 2 - fcvt.s.d $fa1, $fa1 - vextrins.w $vr2, $vr1, 32 - xvpickve.d $xr0, $xr0, 3 - fcvt.s.d $fa0, $fa0 - vextrins.w $vr2, $vr0, 48 - vst $vr2, $a0, 0 + xvpermi.q $xr1, $xr0, 1 + vfcvt.s.d $vr0, $vr1, $vr0 + vst $vr0, $a0, 0 addi.d $a1, $a1, 8 addi.d $a2, $a2, 16 addi.d $a0, $a0, 32