@@ -921,58 +921,60 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
921
921
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
922
922
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
923
923
; GCN-NEXT: s_waitcnt lgkmcnt(0)
924
- ; GCN-NEXT: s_ashr_i64 s[10:11 ], s[2:3], 31
925
- ; GCN-NEXT: s_ashr_i64 s[6:7 ], s[4:5], 31
926
- ; GCN-NEXT: s_ashr_i32 s4 , s5, 31
927
- ; GCN-NEXT: s_add_u32 s6, s6, s4
928
- ; GCN-NEXT: s_mov_b32 s5, s4
929
- ; GCN-NEXT: s_addc_u32 s7, s7, s4
930
- ; GCN-NEXT: s_xor_b64 s[8:9], s[6:7 ], s[4:5 ]
924
+ ; GCN-NEXT: s_ashr_i64 s[2:3 ], s[2:3], 31
925
+ ; GCN-NEXT: s_ashr_i64 s[4:5 ], s[4:5], 31
926
+ ; GCN-NEXT: s_ashr_i32 s6 , s5, 31
927
+ ; GCN-NEXT: s_add_u32 s4, s4, s6
928
+ ; GCN-NEXT: s_mov_b32 s7, s6
929
+ ; GCN-NEXT: s_addc_u32 s5, s5, s6
930
+ ; GCN-NEXT: s_xor_b64 s[8:9], s[4:5 ], s[6:7 ]
931
931
; GCN-NEXT: v_cvt_f32_u32_e32 v0, s8
932
932
; GCN-NEXT: v_cvt_f32_u32_e32 v1, s9
933
- ; GCN-NEXT: s_sub_u32 s2 , 0, s8
934
- ; GCN-NEXT: s_subb_u32 s4 , 0, s9
935
- ; GCN-NEXT: s_ashr_i32 s12 , s3, 31
933
+ ; GCN-NEXT: s_sub_u32 s4 , 0, s8
934
+ ; GCN-NEXT: s_subb_u32 s5 , 0, s9
935
+ ; GCN-NEXT: s_ashr_i32 s10 , s3, 31
936
936
; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
937
937
; GCN-NEXT: v_rcp_f32_e32 v0, v0
938
- ; GCN-NEXT: s_mov_b32 s13, s12
939
- ; GCN-NEXT: s_mov_b32 s5, s1
940
- ; GCN-NEXT: s_mov_b32 s7, 0xf000
938
+ ; GCN-NEXT: s_add_u32 s2, s2, s10
939
+ ; GCN-NEXT: s_mov_b32 s11, s10
940
+ ; GCN-NEXT: s_addc_u32 s3, s3, s10
941
941
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
942
942
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
943
943
; GCN-NEXT: v_trunc_f32_e32 v1, v1
944
944
; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
945
945
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
946
946
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
947
+ ; GCN-NEXT: s_xor_b64 s[12:13], s[2:3], s[10:11]
948
+ ; GCN-NEXT: s_mov_b32 s7, 0xf000
949
+ ; GCN-NEXT: v_mul_lo_u32 v2, s4, v1
950
+ ; GCN-NEXT: v_mul_hi_u32 v3, s4, v0
951
+ ; GCN-NEXT: v_mul_lo_u32 v5, s5, v0
952
+ ; GCN-NEXT: v_mul_lo_u32 v4, s4, v0
947
953
; GCN-NEXT: s_mov_b32 s6, -1
948
- ; GCN-NEXT: v_mul_lo_u32 v2, s2, v1
949
- ; GCN-NEXT: v_mul_hi_u32 v3, s2, v0
950
- ; GCN-NEXT: v_mul_lo_u32 v5, s4, v0
951
- ; GCN-NEXT: v_mul_lo_u32 v4, s2, v0
952
954
; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3
953
955
; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v5
954
956
; GCN-NEXT: v_mul_hi_u32 v3, v0, v4
955
957
; GCN-NEXT: v_mul_lo_u32 v5, v0, v2
956
- ; GCN-NEXT: v_mul_hi_u32 v6, v0, v2
957
- ; GCN-NEXT: v_mul_hi_u32 v7, v1, v2
958
- ; GCN-NEXT: v_mul_lo_u32 v2, v1, v2
959
- ; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5
960
- ; GCN-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc
958
+ ; GCN-NEXT: v_mul_hi_u32 v7, v0, v2
961
959
; GCN-NEXT: v_mul_lo_u32 v6, v1, v4
962
960
; GCN-NEXT: v_mul_hi_u32 v4, v1, v4
961
+ ; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5
962
+ ; GCN-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc
963
+ ; GCN-NEXT: v_mul_hi_u32 v7, v1, v2
964
+ ; GCN-NEXT: v_mul_lo_u32 v2, v1, v2
963
965
; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v6
964
966
; GCN-NEXT: v_addc_u32_e32 v3, vcc, v5, v4, vcc
965
967
; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v7, vcc
966
968
; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2
967
969
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc
968
970
; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2
969
971
; GCN-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
970
- ; GCN-NEXT: v_mul_lo_u32 v2, s2 , v1
971
- ; GCN-NEXT: v_mul_hi_u32 v3, s2 , v0
972
- ; GCN-NEXT: v_mul_lo_u32 v4, s4 , v0
973
- ; GCN-NEXT: s_mov_b32 s4, s0
972
+ ; GCN-NEXT: v_mul_lo_u32 v2, s4 , v1
973
+ ; GCN-NEXT: v_mul_hi_u32 v3, s4 , v0
974
+ ; GCN-NEXT: v_mul_lo_u32 v4, s5 , v0
975
+ ; GCN-NEXT: s_mov_b32 s5, s1
974
976
; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3
975
- ; GCN-NEXT: v_mul_lo_u32 v3, s2 , v0
977
+ ; GCN-NEXT: v_mul_lo_u32 v3, s4 , v0
976
978
; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4
977
979
; GCN-NEXT: v_mul_lo_u32 v6, v0, v2
978
980
; GCN-NEXT: v_mul_hi_u32 v7, v0, v3
@@ -988,20 +990,18 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
988
990
; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc
989
991
; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2
990
992
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc
991
- ; GCN-NEXT: s_add_u32 s2, s10, s12
992
993
; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2
993
- ; GCN-NEXT: s_addc_u32 s3, s11, s12
994
994
; GCN-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
995
- ; GCN-NEXT: s_xor_b64 s[10:11], s[2:3], s[12:13]
996
- ; GCN-NEXT: v_mul_lo_u32 v2, s10, v1
997
- ; GCN-NEXT: v_mul_hi_u32 v3, s10, v0
998
- ; GCN-NEXT: v_mul_hi_u32 v4, s10, v1
999
- ; GCN-NEXT: v_mul_hi_u32 v5, s11, v1
1000
- ; GCN-NEXT: v_mul_lo_u32 v1, s11, v1
995
+ ; GCN-NEXT: v_mul_lo_u32 v2, s12, v1
996
+ ; GCN-NEXT: v_mul_hi_u32 v3, s12, v0
997
+ ; GCN-NEXT: v_mul_hi_u32 v4, s12, v1
998
+ ; GCN-NEXT: v_mul_hi_u32 v5, s13, v1
999
+ ; GCN-NEXT: v_mul_lo_u32 v1, s13, v1
1001
1000
; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2
1002
1001
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc
1003
- ; GCN-NEXT: v_mul_lo_u32 v4, s11, v0
1004
- ; GCN-NEXT: v_mul_hi_u32 v0, s11, v0
1002
+ ; GCN-NEXT: v_mul_lo_u32 v4, s13, v0
1003
+ ; GCN-NEXT: v_mul_hi_u32 v0, s13, v0
1004
+ ; GCN-NEXT: s_mov_b32 s4, s0
1005
1005
; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4
1006
1006
; GCN-NEXT: v_addc_u32_e32 v0, vcc, v3, v0, vcc
1007
1007
; GCN-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc
@@ -1013,9 +1013,9 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
1013
1013
; GCN-NEXT: v_mul_lo_u32 v0, s8, v0
1014
1014
; GCN-NEXT: v_add_i32_e32 v1, vcc, v1, v2
1015
1015
; GCN-NEXT: v_add_i32_e32 v1, vcc, v3, v1
1016
- ; GCN-NEXT: v_sub_i32_e32 v2, vcc, s11 , v1
1016
+ ; GCN-NEXT: v_sub_i32_e32 v2, vcc, s13 , v1
1017
1017
; GCN-NEXT: v_mov_b32_e32 v3, s9
1018
- ; GCN-NEXT: v_sub_i32_e32 v0, vcc, s10 , v0
1018
+ ; GCN-NEXT: v_sub_i32_e32 v0, vcc, s12 , v0
1019
1019
; GCN-NEXT: v_subb_u32_e64 v2, s[0:1], v2, v3, vcc
1020
1020
; GCN-NEXT: v_subrev_i32_e64 v4, s[0:1], s8, v0
1021
1021
; GCN-NEXT: v_subbrev_u32_e64 v5, s[2:3], 0, v2, s[0:1]
@@ -1030,7 +1030,7 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
1030
1030
; GCN-NEXT: v_subbrev_u32_e64 v2, s[0:1], 0, v2, s[0:1]
1031
1031
; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6
1032
1032
; GCN-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[0:1]
1033
- ; GCN-NEXT: v_mov_b32_e32 v4, s11
1033
+ ; GCN-NEXT: v_mov_b32_e32 v4, s13
1034
1034
; GCN-NEXT: v_subb_u32_e32 v1, vcc, v4, v1, vcc
1035
1035
; GCN-NEXT: v_cmp_le_u32_e32 vcc, s9, v1
1036
1036
; GCN-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc
@@ -1042,10 +1042,10 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
1042
1042
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
1043
1043
; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1044
1044
; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1045
- ; GCN-NEXT: v_xor_b32_e32 v0, s12 , v0
1046
- ; GCN-NEXT: v_xor_b32_e32 v1, s12 , v1
1047
- ; GCN-NEXT: v_mov_b32_e32 v2, s12
1048
- ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s12 , v0
1045
+ ; GCN-NEXT: v_xor_b32_e32 v0, s10 , v0
1046
+ ; GCN-NEXT: v_xor_b32_e32 v1, s10 , v1
1047
+ ; GCN-NEXT: v_mov_b32_e32 v2, s10
1048
+ ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s10 , v0
1049
1049
; GCN-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc
1050
1050
; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1051
1051
; GCN-NEXT: s_endpgm
0 commit comments