diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 27acdf284f2ee..aab19b8adc275 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -1227,118 +1227,6 @@ class ScratchLoadSVaddrPat_D16 ; -let OtherPredicates = [HasFlatAddressSpace] in { - -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; - -def : FlatLoadPat ; -def : FlatLoadPat ; - -def : FlatStorePat ; -def : FlatStorePat ; - -foreach vt = Reg32Types.types in { -def : FlatLoadPat ; -def : FlatStorePat ; -} - -foreach vt = VReg_64.RegTypes in { -def : FlatStorePat ; -def : FlatLoadPat ; -} - -def : FlatStorePat ; - -foreach vt = VReg_128.RegTypes in { -def : FlatLoadPat ; -def : FlatStorePat ; -} - -def : FlatStorePat ; -def : FlatStorePat ; -def : FlatStorePat ; -def : FlatStorePat ; -def : FlatStorePat ; -def : FlatStorePat ; - -foreach as = [ "flat", "global" ] in { -defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>; -defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>; -defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_load_uinc_wrap_"#as, i32>; -defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_load_udec_wrap_"#as, i32>; -defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>; -defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>; -defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>; -defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN", "atomic_load_min_"#as, i32>; -defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN", "atomic_load_umin_"#as, i32>; -defm : FlatAtomicPat <"FLAT_ATOMIC_OR", "atomic_load_or_"#as, i32>; -defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP", "atomic_swap_"#as, i32>; -defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_"#as, i32, v2i32>; -defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>; - -defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>; -defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>; -defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_load_uinc_wrap_"#as, i64>; -defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_load_udec_wrap_"#as, i64>; -defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>; -defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>; -defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>; -defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN_X2", "atomic_load_min_"#as, i64>; -defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN_X2", "atomic_load_umin_"#as, i64>; -defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>; -defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>; -defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>; -defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>; -} // end foreach as - -let SubtargetPredicate = isGFX12Plus in { - defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32 >; - - let OtherPredicates = [HasAtomicCSubNoRtnInsts] in - defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>; -} - -def : FlatStorePat ; -def : FlatStorePat ; - -let OtherPredicates = [HasD16LoadStore] in { -def : FlatStorePat ; -def : FlatStorePat ; -} - -let OtherPredicates = [D16PreservesUnusedBits] in { -def : FlatLoadPat_D16 ; -def : FlatLoadPat_D16 ; -def : FlatLoadPat_D16 ; -def : FlatLoadPat_D16 ; -def : FlatLoadPat_D16 ; -def : FlatLoadPat_D16 ; - -def : FlatLoadPat_D16 ; -def : FlatLoadPat_D16 ; -def : FlatLoadPat_D16 ; -def : FlatLoadPat_D16 ; -def : FlatLoadPat_D16 ; -def : FlatLoadPat_D16 ; -} - -} // End OtherPredicates = [HasFlatAddressSpace] - - multiclass GlobalFLATLoadPats { def : FlatLoadSignedPat { let AddedComplexity = 10; @@ -1467,6 +1355,117 @@ multiclass ScratchFLATLoadPats_D16; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; + +def : FlatLoadPat ; +def : FlatLoadPat ; + +def : FlatStorePat ; +def : FlatStorePat ; + +foreach vt = Reg32Types.types in { +def : FlatLoadPat ; +def : FlatStorePat ; +} + +foreach vt = VReg_64.RegTypes in { +def : FlatStorePat ; +def : FlatLoadPat ; +} + +def : FlatStorePat ; + +foreach vt = VReg_128.RegTypes in { +def : FlatLoadPat ; +def : FlatStorePat ; +} + +def : FlatStorePat ; +def : FlatStorePat ; +def : FlatStorePat ; +def : FlatStorePat ; +def : FlatStorePat ; +def : FlatStorePat ; + +foreach as = [ "flat", "global" ] in { +defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_load_uinc_wrap_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_load_udec_wrap_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN", "atomic_load_min_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN", "atomic_load_umin_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_OR", "atomic_load_or_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP", "atomic_swap_"#as, i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_"#as, i32, v2i32>; +defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>; + +defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_load_uinc_wrap_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_load_udec_wrap_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN_X2", "atomic_load_min_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN_X2", "atomic_load_umin_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>; +defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>; +} // end foreach as + +let SubtargetPredicate = isGFX12Plus in { + defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32 >; + + let OtherPredicates = [HasAtomicCSubNoRtnInsts] in + defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>; +} + +def : FlatStorePat ; +def : FlatStorePat ; + +let OtherPredicates = [HasD16LoadStore] in { +def : FlatStorePat ; +def : FlatStorePat ; +} + +let OtherPredicates = [D16PreservesUnusedBits] in { +def : FlatLoadPat_D16 ; +def : FlatLoadPat_D16 ; +def : FlatLoadPat_D16 ; +def : FlatLoadPat_D16 ; +def : FlatLoadPat_D16 ; +def : FlatLoadPat_D16 ; + +def : FlatLoadPat_D16 ; +def : FlatLoadPat_D16 ; +def : FlatLoadPat_D16 ; +def : FlatLoadPat_D16 ; +def : FlatLoadPat_D16 ; +def : FlatLoadPat_D16 ; +} + +} // End OtherPredicates = [HasFlatAddressSpace] + let OtherPredicates = [HasFlatGlobalInsts] in { defm : GlobalFLATLoadPats ;