@@ -3077,11 +3077,9 @@ define i16 @v_fshr_i16(i16 %lhs, i16 %rhs, i16 %amt) {
30773077; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
30783078; GFX6-NEXT: v_and_b32_e32 v2, 15, v2
30793079; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
3080- ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
3081- ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
3082- ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3
30833080; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
3084- ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
3081+ ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
3082+ ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v3, v1
30853083; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
30863084; GFX6-NEXT: s_setpc_b64 s[30:31]
30873085;
@@ -3235,9 +3233,7 @@ define amdgpu_ps half @v_fshr_i16_ssv(i16 inreg %lhs, i16 inreg %rhs, i16 %amt)
32353233; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
32363234; GFX6-NEXT: v_and_b32_e32 v0, 15, v0
32373235; GFX6-NEXT: s_lshl_b32 s0, s0, 1
3238- ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
32393236; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0
3240- ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
32413237; GFX6-NEXT: s_and_b32 s0, s1, 0xffff
32423238; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1
32433239; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
@@ -3570,26 +3566,22 @@ define <2 x i16> @v_fshr_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) {
35703566; GFX6-NEXT: v_lshrrev_b32_e32 v5, 14, v5
35713567; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
35723568; GFX6-NEXT: v_or_b32_e32 v1, v1, v5
3569+ ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2
35733570; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v4
35743571; GFX6-NEXT: v_and_b32_e32 v6, 15, v4
35753572; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
3576- ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2
35773573; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
3578- ; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
35793574; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15
3580- ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
35813575; GFX6-NEXT: v_lshlrev_b32_e32 v0, v6, v0
35823576; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
3577+ ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
35833578; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
35843579; GFX6-NEXT: v_and_b32_e32 v2, 15, v5
35853580; GFX6-NEXT: v_xor_b32_e32 v4, -1, v5
3586- ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
35873581; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
3588- ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
35893582; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1
35903583; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15
3591- ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v4
3592- ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v3, v2
3584+ ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
35933585; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
35943586; GFX6-NEXT: s_setpc_b64 s[30:31]
35953587;
@@ -3735,32 +3727,28 @@ define amdgpu_ps float @v_fshr_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %
37353727; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
37363728; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
37373729; GFX6-NEXT: s_bfe_u32 s4, s2, 0xf0001
3738- ; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
37393730; GFX6-NEXT: s_lshl_b32 s0, s0, 1
37403731; GFX6-NEXT: s_lshr_b32 s4, s4, 14
3741- ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
3742- ; GFX6-NEXT: v_and_b32_e32 v2, 15, v0
37433732; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
37443733; GFX6-NEXT: s_or_b32 s0, s0, s4
37453734; GFX6-NEXT: s_lshl_b32 s2, s2, 1
3735+ ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
3736+ ; GFX6-NEXT: v_and_b32_e32 v2, 15, v0
3737+ ; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
37463738; GFX6-NEXT: v_and_b32_e32 v0, 15, v0
3747- ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
37483739; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2
37493740; GFX6-NEXT: s_bfe_u32 s0, s2, 0xf0001
3750- ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
3751- ; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0
37523741; GFX6-NEXT: s_bfe_u32 s4, s3, 0xf0001
3753- ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
3754- ; GFX6-NEXT: v_and_b32_e32 v2, 15, v1
3755- ; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1
3742+ ; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0
37563743; GFX6-NEXT: s_lshl_b32 s1, s1, 1
37573744; GFX6-NEXT: s_lshr_b32 s4, s4, 14
37583745; GFX6-NEXT: s_lshl_b32 s3, s3, 1
3759- ; GFX6-NEXT: v_and_b32_e32 v1, 15, v1
3746+ ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
3747+ ; GFX6-NEXT: v_and_b32_e32 v2, 15, v1
3748+ ; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1
37603749; GFX6-NEXT: s_or_b32 s1, s1, s4
3761- ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
3750+ ; GFX6-NEXT: v_and_b32_e32 v1, 15, v1
37623751; GFX6-NEXT: s_bfe_u32 s0, s3, 0xf0001
3763- ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
37643752; GFX6-NEXT: v_lshl_b32_e32 v2, s1, v2
37653753; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1
37663754; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
@@ -4358,26 +4346,22 @@ define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
43584346; GFX6-NEXT: v_lshrrev_b32_e32 v8, 14, v8
43594347; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
43604348; GFX6-NEXT: v_or_b32_e32 v1, v1, v8
4349+ ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
43614350; GFX6-NEXT: v_lshrrev_b32_e32 v8, 16, v6
43624351; GFX6-NEXT: v_and_b32_e32 v9, 15, v6
43634352; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
4364- ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
43654353; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
4366- ; GFX6-NEXT: v_and_b32_e32 v9, 0xffff, v9
43674354; GFX6-NEXT: v_bfe_u32 v3, v3, 1, 15
4368- ; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
43694355; GFX6-NEXT: v_lshlrev_b32_e32 v0, v9, v0
43704356; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3
4357+ ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4
43714358; GFX6-NEXT: v_or_b32_e32 v0, v0, v3
43724359; GFX6-NEXT: v_and_b32_e32 v3, 15, v8
43734360; GFX6-NEXT: v_xor_b32_e32 v6, -1, v8
4374- ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4
43754361; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
4376- ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
43774362; GFX6-NEXT: v_lshlrev_b32_e32 v1, v3, v1
43784363; GFX6-NEXT: v_bfe_u32 v3, v4, 1, 15
4379- ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v6
4380- ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3
4364+ ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3
43814365; GFX6-NEXT: v_or_b32_e32 v1, v1, v3
43824366; GFX6-NEXT: v_bfe_u32 v3, v5, 1, 15
43834367; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2
@@ -4388,9 +4372,7 @@ define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
43884372; GFX6-NEXT: v_and_b32_e32 v5, 15, v4
43894373; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
43904374; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
4391- ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5
43924375; GFX6-NEXT: v_bfe_u32 v3, v3, 1, 15
4393- ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
43944376; GFX6-NEXT: v_lshlrev_b32_e32 v2, v5, v2
43954377; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3
43964378; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
@@ -4782,26 +4764,22 @@ define <4 x half> @v_fshr_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
47824764; GFX6-NEXT: v_lshrrev_b32_e32 v10, 14, v10
47834765; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
47844766; GFX6-NEXT: v_or_b32_e32 v1, v1, v10
4767+ ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4
47854768; GFX6-NEXT: v_lshrrev_b32_e32 v10, 16, v8
47864769; GFX6-NEXT: v_and_b32_e32 v11, 15, v8
47874770; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
4788- ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4
47894771; GFX6-NEXT: v_and_b32_e32 v8, 15, v8
4790- ; GFX6-NEXT: v_and_b32_e32 v11, 0xffff, v11
47914772; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15
4792- ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8
47934773; GFX6-NEXT: v_lshlrev_b32_e32 v0, v11, v0
47944774; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4
4775+ ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v5
47954776; GFX6-NEXT: v_or_b32_e32 v0, v0, v4
47964777; GFX6-NEXT: v_and_b32_e32 v4, 15, v10
47974778; GFX6-NEXT: v_xor_b32_e32 v8, -1, v10
4798- ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v5
47994779; GFX6-NEXT: v_and_b32_e32 v8, 15, v8
4800- ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
48014780; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1
48024781; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15
4803- ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v8
4804- ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
4782+ ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4
48054783; GFX6-NEXT: v_or_b32_e32 v1, v1, v4
48064784; GFX6-NEXT: v_bfe_u32 v4, v6, 1, 15
48074785; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2
@@ -4818,20 +4796,16 @@ define <4 x half> @v_fshr_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
48184796; GFX6-NEXT: v_and_b32_e32 v8, 15, v6
48194797; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
48204798; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
4821- ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8
48224799; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15
4823- ; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
48244800; GFX6-NEXT: v_lshlrev_b32_e32 v2, v8, v2
48254801; GFX6-NEXT: v_lshrrev_b32_e32 v4, v6, v4
48264802; GFX6-NEXT: v_or_b32_e32 v2, v2, v4
48274803; GFX6-NEXT: v_and_b32_e32 v4, 15, v7
48284804; GFX6-NEXT: v_xor_b32_e32 v6, -1, v7
48294805; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
4830- ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
48314806; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3
48324807; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15
4833- ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v6
4834- ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
4808+ ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v6, v4
48354809; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
48364810; GFX6-NEXT: s_setpc_b64 s[30:31]
48374811;
0 commit comments