@@ -3851,9 +3851,9 @@ define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(ptr addrspace(3) %out
38513851; VI-DS128-NEXT: v_and_b32_e32 v32, 0xffff, v24
38523852; VI-DS128-NEXT: ds_read_b128 v[24:27], v0 offset:80
38533853; VI-DS128-NEXT: ds_read_b128 v[55:58], v0 offset:96
3854- ; VI-DS128-NEXT: v_lshrrev_b32_e32 v15 , 16, v11
3854+ ; VI-DS128-NEXT: v_lshrrev_b32_e32 v31 , 16, v11
38553855; VI-DS128-NEXT: v_lshrrev_b32_e32 v29, 16, v10
3856- ; VI-DS128-NEXT: v_mov_b32_e32 v31, v15
3856+ ; VI-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v9
38573857; VI-DS128-NEXT: s_waitcnt lgkmcnt(1)
38583858; VI-DS128-NEXT: v_lshrrev_b32_e32 v50, 16, v27
38593859; VI-DS128-NEXT: v_lshrrev_b32_e32 v48, 16, v26
@@ -3864,17 +3864,16 @@ define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(ptr addrspace(3) %out
38643864; VI-DS128-NEXT: v_and_b32_e32 v53, 0xffff, v25
38653865; VI-DS128-NEXT: v_and_b32_e32 v51, 0xffff, v24
38663866; VI-DS128-NEXT: ds_read_b128 v[24:27], v0 offset:112
3867- ; VI-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v9
38683867; VI-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v8
38693868; VI-DS128-NEXT: v_and_b32_e32 v30, 0xffff, v11
38703869; VI-DS128-NEXT: v_and_b32_e32 v28, 0xffff, v10
3870+ ; VI-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v9
38713871; VI-DS128-NEXT: s_waitcnt lgkmcnt(0)
38723872; VI-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v25
38733873; VI-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v24
38743874; VI-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v25
38753875; VI-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v24
38763876; VI-DS128-NEXT: v_mov_b32_e32 v24, s0
3877- ; VI-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v9
38783877; VI-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v8
38793878; VI-DS128-NEXT: v_lshrrev_b32_e32 v42, 16, v39
38803879; VI-DS128-NEXT: v_lshrrev_b32_e32 v40, 16, v38
@@ -3944,7 +3943,7 @@ define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(ptr addrspace(3) %out
39443943; GFX9-DS128-NEXT: s_addc_u32 s13, s13, 0
39453944; GFX9-DS128-NEXT: ds_read_b128 v[20:23], v0 offset:32
39463945; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(2)
3947- ; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v15 , 16, v11
3946+ ; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v31 , 16, v11
39483947; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1)
39493948; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v19
39503949; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v18
@@ -3992,8 +3991,8 @@ define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(ptr addrspace(3) %out
39923991; GFX9-DS128-NEXT: ds_read_b128 v[24:27], v0 offset:80
39933992; GFX9-DS128-NEXT: ds_read_b128 v[55:58], v0 offset:96
39943993; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v29, 16, v10
3995- ; GFX9-DS128-NEXT: v_mov_b32_e32 v31, v15
39963994; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v9
3995+ ; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v8
39973996; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1)
39983997; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v50, 16, v27
39993998; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v48, 16, v26
@@ -4004,17 +4003,16 @@ define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(ptr addrspace(3) %out
40044003; GFX9-DS128-NEXT: v_and_b32_e32 v53, 0xffff, v25
40054004; GFX9-DS128-NEXT: v_and_b32_e32 v51, 0xffff, v24
40064005; GFX9-DS128-NEXT: ds_read_b128 v[24:27], v0 offset:112
4007- ; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v8
40084006; GFX9-DS128-NEXT: v_and_b32_e32 v30, 0xffff, v11
40094007; GFX9-DS128-NEXT: v_and_b32_e32 v28, 0xffff, v10
40104008; GFX9-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v9
4009+ ; GFX9-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v8
40114010; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0)
40124011; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v25
40134012; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v24
40144013; GFX9-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v25
40154014; GFX9-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v24
40164015; GFX9-DS128-NEXT: v_mov_b32_e32 v24, s0
4017- ; GFX9-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v8
40184016; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v42, 16, v39
40194017; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v40, 16, v38
40204018; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v46, 16, v37
@@ -4890,7 +4888,7 @@ define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(ptr addrspace(3) %out
48904888; VI-DS128-NEXT: s_waitcnt lgkmcnt(2)
48914889; VI-DS128-NEXT: v_ashrrev_i32_e32 v53, 16, v40
48924890; VI-DS128-NEXT: v_bfe_i32 v52, v40, 0, 16
4893- ; VI-DS128-NEXT: v_ashrrev_i32_e32 v15 , 16, v11
4891+ ; VI-DS128-NEXT: v_ashrrev_i32_e32 v23 , 16, v11
48944892; VI-DS128-NEXT: s_waitcnt lgkmcnt(1)
48954893; VI-DS128-NEXT: v_ashrrev_i32_e32 v47, 16, v39
48964894; VI-DS128-NEXT: v_ashrrev_i32_e32 v45, 16, v38
@@ -4901,14 +4899,13 @@ define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(ptr addrspace(3) %out
49014899; VI-DS128-NEXT: ds_read_b128 v[37:40], v32 offset:112
49024900; VI-DS128-NEXT: v_mov_b32_e32 v32, s0
49034901; VI-DS128-NEXT: v_ashrrev_i32_e32 v21, 16, v10
4904- ; VI-DS128-NEXT: v_mov_b32_e32 v23, v15
49054902; VI-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v9
4903+ ; VI-DS128-NEXT: v_ashrrev_i32_e32 v13, 16, v8
49064904; VI-DS128-NEXT: s_waitcnt lgkmcnt(0)
49074905; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v38
49084906; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v37
49094907; VI-DS128-NEXT: v_bfe_i32 v2, v38, 0, 16
49104908; VI-DS128-NEXT: v_bfe_i32 v0, v37, 0, 16
4911- ; VI-DS128-NEXT: v_ashrrev_i32_e32 v13, 16, v8
49124909; VI-DS128-NEXT: v_bfe_i32 v22, v11, 0, 16
49134910; VI-DS128-NEXT: v_bfe_i32 v20, v10, 0, 16
49144911; VI-DS128-NEXT: v_bfe_i32 v14, v9, 0, 16
@@ -4986,7 +4983,7 @@ define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(ptr addrspace(3) %out
49864983; GFX9-DS128-NEXT: s_addc_u32 s13, s13, 0
49874984; GFX9-DS128-NEXT: ds_read_b128 v[24:27], v32 offset:32
49884985; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(2)
4989- ; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v15 , 16, v11
4986+ ; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v23 , 16, v11
49904987; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1)
49914988; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v19
49924989; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v18
@@ -5031,15 +5028,14 @@ define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(ptr addrspace(3) %out
50315028; GFX9-DS128-NEXT: v_bfe_i32 v50, v37, 0, 16
50325029; GFX9-DS128-NEXT: ds_read_b128 v[37:40], v32 offset:112
50335030; GFX9-DS128-NEXT: v_mov_b32_e32 v32, s0
5034- ; GFX9-DS128-NEXT: v_mov_b32_e32 v23, v15
50355031; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v9
50365032; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v13, 16, v8
5033+ ; GFX9-DS128-NEXT: v_bfe_i32 v22, v11, 0, 16
50375034; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0)
50385035; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v38
50395036; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v37
50405037; GFX9-DS128-NEXT: v_bfe_i32 v2, v38, 0, 16
50415038; GFX9-DS128-NEXT: v_bfe_i32 v0, v37, 0, 16
5042- ; GFX9-DS128-NEXT: v_bfe_i32 v22, v11, 0, 16
50435039; GFX9-DS128-NEXT: v_bfe_i32 v20, v10, 0, 16
50445040; GFX9-DS128-NEXT: v_bfe_i32 v14, v9, 0, 16
50455041; GFX9-DS128-NEXT: v_bfe_i32 v12, v8, 0, 16
0 commit comments