@@ -452,7 +452,7 @@ define double @v_uitofp_i8_to_f64(i8 %arg0) nounwind {
452452define amdgpu_kernel void @load_i8_to_f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
453453; SI-LABEL: load_i8_to_f32:
454454; SI: ; %bb.0:
455- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
455+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
456456; SI-NEXT: s_mov_b32 s6, 0
457457; SI-NEXT: s_mov_b32 s7, 0xf000
458458; SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
@@ -468,7 +468,7 @@ define amdgpu_kernel void @load_i8_to_f32(ptr addrspace(1) noalias %out, ptr add
468468;
469469; VI-LABEL: load_i8_to_f32:
470470; VI: ; %bb.0:
471- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
471+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
472472; VI-NEXT: v_ashrrev_i32_e32 v3, 31, v0
473473; VI-NEXT: s_waitcnt lgkmcnt(0)
474474; VI-NEXT: v_mov_b32_e32 v1, s2
@@ -493,7 +493,7 @@ define amdgpu_kernel void @load_i8_to_f32(ptr addrspace(1) noalias %out, ptr add
493493define amdgpu_kernel void @load_v2i8_to_v2f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
494494; SI-LABEL: load_v2i8_to_v2f32:
495495; SI: ; %bb.0:
496- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
496+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
497497; SI-NEXT: v_lshlrev_b32_e32 v0, 1, v0
498498; SI-NEXT: v_mov_b32_e32 v1, 0
499499; SI-NEXT: s_mov_b32 s6, 0
@@ -513,7 +513,7 @@ define amdgpu_kernel void @load_v2i8_to_v2f32(ptr addrspace(1) noalias %out, ptr
513513;
514514; VI-LABEL: load_v2i8_to_v2f32:
515515; VI: ; %bb.0:
516- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
516+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
517517; VI-NEXT: v_lshlrev_b32_e32 v2, 1, v0
518518; VI-NEXT: s_waitcnt lgkmcnt(0)
519519; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -539,7 +539,7 @@ define amdgpu_kernel void @load_v2i8_to_v2f32(ptr addrspace(1) noalias %out, ptr
539539define amdgpu_kernel void @load_v3i8_to_v3f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
540540; SI-LABEL: load_v3i8_to_v3f32:
541541; SI: ; %bb.0:
542- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
542+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
543543; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
544544; SI-NEXT: v_mov_b32_e32 v1, 0
545545; SI-NEXT: s_mov_b32 s6, 0
@@ -562,7 +562,7 @@ define amdgpu_kernel void @load_v3i8_to_v3f32(ptr addrspace(1) noalias %out, ptr
562562;
563563; VI-LABEL: load_v3i8_to_v3f32:
564564; VI: ; %bb.0:
565- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
565+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
566566; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
567567; VI-NEXT: s_waitcnt lgkmcnt(0)
568568; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -589,7 +589,7 @@ define amdgpu_kernel void @load_v3i8_to_v3f32(ptr addrspace(1) noalias %out, ptr
589589define amdgpu_kernel void @load_v4i8_to_v4f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
590590; SI-LABEL: load_v4i8_to_v4f32:
591591; SI: ; %bb.0:
592- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
592+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
593593; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
594594; SI-NEXT: v_mov_b32_e32 v1, 0
595595; SI-NEXT: s_mov_b32 s6, 0
@@ -612,7 +612,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32(ptr addrspace(1) noalias %out, ptr
612612;
613613; VI-LABEL: load_v4i8_to_v4f32:
614614; VI: ; %bb.0:
615- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
615+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
616616; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
617617; VI-NEXT: s_waitcnt lgkmcnt(0)
618618; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -644,7 +644,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32(ptr addrspace(1) noalias %out, ptr
644644define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
645645; SI-LABEL: load_v4i8_to_v4f32_unaligned:
646646; SI: ; %bb.0:
647- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
647+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
648648; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
649649; SI-NEXT: v_mov_b32_e32 v1, 0
650650; SI-NEXT: s_mov_b32 s6, 0
@@ -679,7 +679,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(ptr addrspace(1) noalias
679679;
680680; VI-LABEL: load_v4i8_to_v4f32_unaligned:
681681; VI: ; %bb.0:
682- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
682+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
683683; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
684684; VI-NEXT: s_waitcnt lgkmcnt(0)
685685; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -725,14 +725,14 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(ptr addrspace(1) noalias
725725define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %out2 , ptr addrspace (1 ) noalias %in ) nounwind {
726726; SI-LABEL: load_v4i8_to_v4f32_2_uses:
727727; SI: ; %bb.0:
728- ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1 ], 0xd
728+ ; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3 ], 0xd
729729; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
730730; SI-NEXT: v_mov_b32_e32 v1, 0
731731; SI-NEXT: s_mov_b32 s6, 0
732732; SI-NEXT: s_mov_b32 s7, 0xf000
733733; SI-NEXT: s_waitcnt lgkmcnt(0)
734734; SI-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
735- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
735+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
736736; SI-NEXT: s_mov_b32 s6, -1
737737; SI-NEXT: s_waitcnt lgkmcnt(0)
738738; SI-NEXT: s_mov_b64 s[4:5], s[0:1]
@@ -769,17 +769,17 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(ptr addrspace(1) noalias %o
769769;
770770; VI-LABEL: load_v4i8_to_v4f32_2_uses:
771771; VI: ; %bb.0:
772- ; VI-NEXT: s_load_dwordx2 s[2:3 ], s[0:1 ], 0x34
772+ ; VI-NEXT: s_load_dwordx2 s[0:1 ], s[2:3 ], 0x34
773773; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
774774; VI-NEXT: v_mov_b32_e32 v6, 9
775775; VI-NEXT: v_mov_b32_e32 v7, 8
776776; VI-NEXT: s_waitcnt lgkmcnt(0)
777- ; VI-NEXT: v_mov_b32_e32 v0, s2
778- ; VI-NEXT: v_mov_b32_e32 v1, s3
777+ ; VI-NEXT: v_mov_b32_e32 v0, s0
778+ ; VI-NEXT: v_mov_b32_e32 v1, s1
779779; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2
780780; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
781781; VI-NEXT: flat_load_dword v1, v[0:1]
782- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
782+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
783783; VI-NEXT: v_mov_b32_e32 v2, 0xff
784784; VI-NEXT: s_waitcnt lgkmcnt(0)
785785; VI-NEXT: v_mov_b32_e32 v5, s1
@@ -821,7 +821,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(ptr addrspace(1) noalias %o
821821define amdgpu_kernel void @load_v7i8_to_v7f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
822822; SI-LABEL: load_v7i8_to_v7f32:
823823; SI: ; %bb.0:
824- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
824+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
825825; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
826826; SI-NEXT: v_mov_b32_e32 v1, 0
827827; SI-NEXT: s_mov_b32 s6, 0
@@ -858,7 +858,7 @@ define amdgpu_kernel void @load_v7i8_to_v7f32(ptr addrspace(1) noalias %out, ptr
858858;
859859; VI-LABEL: load_v7i8_to_v7f32:
860860; VI: ; %bb.0:
861- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
861+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
862862; VI-NEXT: v_lshlrev_b32_e32 v2, 3, v0
863863; VI-NEXT: s_waitcnt lgkmcnt(0)
864864; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -918,7 +918,7 @@ define amdgpu_kernel void @load_v7i8_to_v7f32(ptr addrspace(1) noalias %out, ptr
918918define amdgpu_kernel void @load_v8i8_to_v8f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
919919; SI-LABEL: load_v8i8_to_v8f32:
920920; SI: ; %bb.0:
921- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
921+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
922922; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
923923; SI-NEXT: v_mov_b32_e32 v1, 0
924924; SI-NEXT: s_mov_b32 s6, 0
@@ -949,7 +949,7 @@ define amdgpu_kernel void @load_v8i8_to_v8f32(ptr addrspace(1) noalias %out, ptr
949949;
950950; VI-LABEL: load_v8i8_to_v8f32:
951951; VI: ; %bb.0:
952- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
952+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
953953; VI-NEXT: v_lshlrev_b32_e32 v2, 3, v0
954954; VI-NEXT: s_waitcnt lgkmcnt(0)
955955; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -986,7 +986,7 @@ define amdgpu_kernel void @load_v8i8_to_v8f32(ptr addrspace(1) noalias %out, ptr
986986define amdgpu_kernel void @i8_zext_inreg_i32_to_f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
987987; SI-LABEL: i8_zext_inreg_i32_to_f32:
988988; SI: ; %bb.0:
989- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
989+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
990990; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
991991; SI-NEXT: v_mov_b32_e32 v1, 0
992992; SI-NEXT: s_mov_b32 s6, 0
@@ -1005,7 +1005,7 @@ define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(ptr addrspace(1) noalias %ou
10051005;
10061006; VI-LABEL: i8_zext_inreg_i32_to_f32:
10071007; VI: ; %bb.0:
1008- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
1008+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
10091009; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
10101010; VI-NEXT: s_waitcnt lgkmcnt(0)
10111011; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1033,7 +1033,7 @@ define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(ptr addrspace(1) noalias %ou
10331033define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
10341034; SI-LABEL: i8_zext_inreg_hi1_to_f32:
10351035; SI: ; %bb.0:
1036- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
1036+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
10371037; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
10381038; SI-NEXT: v_mov_b32_e32 v1, 0
10391039; SI-NEXT: s_mov_b32 s6, 0
@@ -1051,7 +1051,7 @@ define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(ptr addrspace(1) noalias %ou
10511051;
10521052; VI-LABEL: i8_zext_inreg_hi1_to_f32:
10531053; VI: ; %bb.0:
1054- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
1054+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
10551055; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
10561056; VI-NEXT: s_waitcnt lgkmcnt(0)
10571057; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1080,7 +1080,7 @@ define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(ptr addrspace(1) noalias %ou
10801080define amdgpu_kernel void @i8_zext_i32_to_f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
10811081; SI-LABEL: i8_zext_i32_to_f32:
10821082; SI: ; %bb.0:
1083- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
1083+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
10841084; SI-NEXT: s_mov_b32 s6, 0
10851085; SI-NEXT: s_mov_b32 s7, 0xf000
10861086; SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
@@ -1096,7 +1096,7 @@ define amdgpu_kernel void @i8_zext_i32_to_f32(ptr addrspace(1) noalias %out, ptr
10961096;
10971097; VI-LABEL: i8_zext_i32_to_f32:
10981098; VI: ; %bb.0:
1099- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
1099+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
11001100; VI-NEXT: v_ashrrev_i32_e32 v3, 31, v0
11011101; VI-NEXT: s_waitcnt lgkmcnt(0)
11021102; VI-NEXT: v_mov_b32_e32 v1, s2
@@ -1122,7 +1122,7 @@ define amdgpu_kernel void @i8_zext_i32_to_f32(ptr addrspace(1) noalias %out, ptr
11221122define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
11231123; SI-LABEL: v4i8_zext_v4i32_to_v4f32:
11241124; SI: ; %bb.0:
1125- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
1125+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
11261126; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
11271127; SI-NEXT: v_mov_b32_e32 v1, 0
11281128; SI-NEXT: s_mov_b32 s6, 0
@@ -1157,7 +1157,7 @@ define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(ptr addrspace(1) noalias %ou
11571157;
11581158; VI-LABEL: v4i8_zext_v4i32_to_v4f32:
11591159; VI: ; %bb.0:
1160- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
1160+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
11611161; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
11621162; VI-NEXT: s_waitcnt lgkmcnt(0)
11631163; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1204,7 +1204,7 @@ define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(ptr addrspace(1) noalias %ou
12041204define amdgpu_kernel void @extract_byte0_to_f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
12051205; SI-LABEL: extract_byte0_to_f32:
12061206; SI: ; %bb.0:
1207- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
1207+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
12081208; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
12091209; SI-NEXT: v_mov_b32_e32 v1, 0
12101210; SI-NEXT: s_mov_b32 s6, 0
@@ -1221,7 +1221,7 @@ define amdgpu_kernel void @extract_byte0_to_f32(ptr addrspace(1) noalias %out, p
12211221;
12221222; VI-LABEL: extract_byte0_to_f32:
12231223; VI: ; %bb.0:
1224- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
1224+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
12251225; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
12261226; VI-NEXT: s_waitcnt lgkmcnt(0)
12271227; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1247,7 +1247,7 @@ define amdgpu_kernel void @extract_byte0_to_f32(ptr addrspace(1) noalias %out, p
12471247define amdgpu_kernel void @extract_byte1_to_f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
12481248; SI-LABEL: extract_byte1_to_f32:
12491249; SI: ; %bb.0:
1250- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
1250+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
12511251; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
12521252; SI-NEXT: v_mov_b32_e32 v1, 0
12531253; SI-NEXT: s_mov_b32 s6, 0
@@ -1265,7 +1265,7 @@ define amdgpu_kernel void @extract_byte1_to_f32(ptr addrspace(1) noalias %out, p
12651265;
12661266; VI-LABEL: extract_byte1_to_f32:
12671267; VI: ; %bb.0:
1268- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
1268+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
12691269; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
12701270; VI-NEXT: s_waitcnt lgkmcnt(0)
12711271; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1292,7 +1292,7 @@ define amdgpu_kernel void @extract_byte1_to_f32(ptr addrspace(1) noalias %out, p
12921292define amdgpu_kernel void @extract_byte2_to_f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
12931293; SI-LABEL: extract_byte2_to_f32:
12941294; SI: ; %bb.0:
1295- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
1295+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
12961296; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
12971297; SI-NEXT: v_mov_b32_e32 v1, 0
12981298; SI-NEXT: s_mov_b32 s6, 0
@@ -1310,7 +1310,7 @@ define amdgpu_kernel void @extract_byte2_to_f32(ptr addrspace(1) noalias %out, p
13101310;
13111311; VI-LABEL: extract_byte2_to_f32:
13121312; VI: ; %bb.0:
1313- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
1313+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
13141314; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
13151315; VI-NEXT: s_waitcnt lgkmcnt(0)
13161316; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1337,7 +1337,7 @@ define amdgpu_kernel void @extract_byte2_to_f32(ptr addrspace(1) noalias %out, p
13371337define amdgpu_kernel void @extract_byte3_to_f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
13381338; SI-LABEL: extract_byte3_to_f32:
13391339; SI: ; %bb.0:
1340- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
1340+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
13411341; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
13421342; SI-NEXT: v_mov_b32_e32 v1, 0
13431343; SI-NEXT: s_mov_b32 s6, 0
@@ -1354,7 +1354,7 @@ define amdgpu_kernel void @extract_byte3_to_f32(ptr addrspace(1) noalias %out, p
13541354;
13551355; VI-LABEL: extract_byte3_to_f32:
13561356; VI: ; %bb.0:
1357- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
1357+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
13581358; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
13591359; VI-NEXT: s_waitcnt lgkmcnt(0)
13601360; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1381,7 +1381,7 @@ define amdgpu_kernel void @extract_byte3_to_f32(ptr addrspace(1) noalias %out, p
13811381define amdgpu_kernel void @cvt_ubyte0_or_multiuse (ptr addrspace (1 ) %in , ptr addrspace (1 ) %out ) {
13821382; SI-LABEL: cvt_ubyte0_or_multiuse:
13831383; SI: ; %bb.0: ; %bb
1384- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
1384+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
13851385; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
13861386; SI-NEXT: v_mov_b32_e32 v1, 0
13871387; SI-NEXT: s_mov_b32 s6, 0
@@ -1401,7 +1401,7 @@ define amdgpu_kernel void @cvt_ubyte0_or_multiuse(ptr addrspace(1) %in, ptr addr
14011401;
14021402; VI-LABEL: cvt_ubyte0_or_multiuse:
14031403; VI: ; %bb.0: ; %bb
1404- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
1404+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
14051405; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
14061406; VI-NEXT: s_waitcnt lgkmcnt(0)
14071407; VI-NEXT: v_mov_b32_e32 v0, s0
0 commit comments