@@ -1227,118 +1227,6 @@ class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueT
12271227 (inst $vaddr, $saddr, $offset, 0, $in)
12281228>;
12291229
1230- let OtherPredicates = [HasFlatAddressSpace] in {
1231-
1232- def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>;
1233- def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>;
1234- def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>;
1235- def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>;
1236- def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
1237- def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
1238- def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
1239- def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
1240- def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
1241- def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
1242- def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
1243- def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
1244- def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
1245- def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
1246- def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
1247-
1248- def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
1249- def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
1250-
1251- def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
1252- def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
1253-
1254- foreach vt = Reg32Types.types in {
1255- def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>;
1256- def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>;
1257- }
1258-
1259- foreach vt = VReg_64.RegTypes in {
1260- def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>;
1261- def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
1262- }
1263-
1264- def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>;
1265-
1266- foreach vt = VReg_128.RegTypes in {
1267- def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>;
1268- def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>;
1269- }
1270-
1271- def : FlatStorePat <FLAT_STORE_DWORD, atomic_store_32_flat, i32>;
1272- def : FlatStorePat <FLAT_STORE_DWORDX2, atomic_store_64_flat, i64>;
1273- def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i32>;
1274- def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>;
1275- def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i32>;
1276- def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>;
1277-
1278- foreach as = [ "flat", "global" ] in {
1279- defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>;
1280- defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>;
1281- defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_load_uinc_wrap_"#as, i32>;
1282- defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_load_udec_wrap_"#as, i32>;
1283- defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>;
1284- defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>;
1285- defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>;
1286- defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN", "atomic_load_min_"#as, i32>;
1287- defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN", "atomic_load_umin_"#as, i32>;
1288- defm : FlatAtomicPat <"FLAT_ATOMIC_OR", "atomic_load_or_"#as, i32>;
1289- defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP", "atomic_swap_"#as, i32>;
1290- defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_"#as, i32, v2i32>;
1291- defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>;
1292-
1293- defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>;
1294- defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>;
1295- defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_load_uinc_wrap_"#as, i64>;
1296- defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_load_udec_wrap_"#as, i64>;
1297- defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>;
1298- defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>;
1299- defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>;
1300- defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN_X2", "atomic_load_min_"#as, i64>;
1301- defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN_X2", "atomic_load_umin_"#as, i64>;
1302- defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>;
1303- defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>;
1304- defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>;
1305- defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>;
1306- } // end foreach as
1307-
1308- let SubtargetPredicate = isGFX12Plus in {
1309- defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32 >;
1310-
1311- let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1312- defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>;
1313- }
1314-
1315- def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
1316- def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
1317-
1318- let OtherPredicates = [HasD16LoadStore] in {
1319- def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
1320- def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
1321- }
1322-
1323- let OtherPredicates = [D16PreservesUnusedBits] in {
1324- def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
1325- def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
1326- def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
1327- def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>;
1328- def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>;
1329- def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>;
1330-
1331- def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>;
1332- def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>;
1333- def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>;
1334- def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>;
1335- def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
1336- def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
1337- }
1338-
1339- } // End OtherPredicates = [HasFlatAddressSpace]
1340-
1341-
13421230multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
13431231 def : FlatLoadSignedPat <inst, node, vt> {
13441232 let AddedComplexity = 10;
@@ -1467,6 +1355,117 @@ multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, Val
14671355 }
14681356}
14691357
1358+ let OtherPredicates = [HasFlatAddressSpace] in {
1359+
1360+ def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>;
1361+ def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>;
1362+ def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>;
1363+ def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>;
1364+ def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
1365+ def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
1366+ def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
1367+ def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
1368+ def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
1369+ def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
1370+ def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
1371+ def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
1372+ def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
1373+ def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
1374+ def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
1375+
1376+ def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
1377+ def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
1378+
1379+ def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
1380+ def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
1381+
1382+ foreach vt = Reg32Types.types in {
1383+ def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>;
1384+ def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>;
1385+ }
1386+
1387+ foreach vt = VReg_64.RegTypes in {
1388+ def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>;
1389+ def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
1390+ }
1391+
1392+ def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>;
1393+
1394+ foreach vt = VReg_128.RegTypes in {
1395+ def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>;
1396+ def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>;
1397+ }
1398+
1399+ def : FlatStorePat <FLAT_STORE_DWORD, atomic_store_32_flat, i32>;
1400+ def : FlatStorePat <FLAT_STORE_DWORDX2, atomic_store_64_flat, i64>;
1401+ def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i32>;
1402+ def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>;
1403+ def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i32>;
1404+ def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>;
1405+
1406+ foreach as = [ "flat", "global" ] in {
1407+ defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>;
1408+ defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>;
1409+ defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_load_uinc_wrap_"#as, i32>;
1410+ defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_load_udec_wrap_"#as, i32>;
1411+ defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>;
1412+ defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>;
1413+ defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>;
1414+ defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN", "atomic_load_min_"#as, i32>;
1415+ defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN", "atomic_load_umin_"#as, i32>;
1416+ defm : FlatAtomicPat <"FLAT_ATOMIC_OR", "atomic_load_or_"#as, i32>;
1417+ defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP", "atomic_swap_"#as, i32>;
1418+ defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_"#as, i32, v2i32>;
1419+ defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>;
1420+
1421+ defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>;
1422+ defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>;
1423+ defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_load_uinc_wrap_"#as, i64>;
1424+ defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_load_udec_wrap_"#as, i64>;
1425+ defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>;
1426+ defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>;
1427+ defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>;
1428+ defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN_X2", "atomic_load_min_"#as, i64>;
1429+ defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN_X2", "atomic_load_umin_"#as, i64>;
1430+ defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>;
1431+ defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>;
1432+ defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>;
1433+ defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>;
1434+ } // end foreach as
1435+
1436+ let SubtargetPredicate = isGFX12Plus in {
1437+ defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32 >;
1438+
1439+ let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1440+ defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>;
1441+ }
1442+
1443+ def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
1444+ def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
1445+
1446+ let OtherPredicates = [HasD16LoadStore] in {
1447+ def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
1448+ def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
1449+ }
1450+
1451+ let OtherPredicates = [D16PreservesUnusedBits] in {
1452+ def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
1453+ def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
1454+ def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
1455+ def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>;
1456+ def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>;
1457+ def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>;
1458+
1459+ def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>;
1460+ def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>;
1461+ def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>;
1462+ def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>;
1463+ def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
1464+ def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
1465+ }
1466+
1467+ } // End OtherPredicates = [HasFlatAddressSpace]
1468+
14701469let OtherPredicates = [HasFlatGlobalInsts] in {
14711470
14721471defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i32>;
0 commit comments