@@ -1021,20 +1021,20 @@ main_body:
10211021define amdgpu_kernel void @global_atomic_fadd_f64_noret (ptr addrspace (1 ) %ptr , double %data ) {
10221022; GFX90A-LABEL: global_atomic_fadd_f64_noret:
10231023; GFX90A: ; %bb.0: ; %main_body
1024- ; GFX90A-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1024+ ; GFX90A-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
10251025; GFX90A-NEXT: v_mov_b32_e32 v2, 0
10261026; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1027- ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3 ], s[2:3 ] op_sel:[0,1]
1028- ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1 ]
1027+ ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7 ], s[6:7 ] op_sel:[0,1]
1028+ ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5 ]
10291029; GFX90A-NEXT: s_endpgm
10301030;
10311031; GFX940-LABEL: global_atomic_fadd_f64_noret:
10321032; GFX940: ; %bb.0: ; %main_body
1033- ; GFX940-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1033+ ; GFX940-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
10341034; GFX940-NEXT: v_mov_b32_e32 v2, 0
10351035; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1036- ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3 ]
1037- ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1 ]
1036+ ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7 ]
1037+ ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5 ]
10381038; GFX940-NEXT: s_endpgm
10391039main_body:
10401040 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64 (ptr addrspace (1 ) %ptr , double %data )
@@ -1044,20 +1044,20 @@ main_body:
10441044define amdgpu_kernel void @global_atomic_fmin_f64_noret (ptr addrspace (1 ) %ptr , double %data ) {
10451045; GFX90A-LABEL: global_atomic_fmin_f64_noret:
10461046; GFX90A: ; %bb.0: ; %main_body
1047- ; GFX90A-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1047+ ; GFX90A-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
10481048; GFX90A-NEXT: v_mov_b32_e32 v2, 0
10491049; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1050- ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3 ], s[2:3 ] op_sel:[0,1]
1051- ; GFX90A-NEXT: global_atomic_min_f64 v2, v[0:1], s[0:1 ]
1050+ ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7 ], s[6:7 ] op_sel:[0,1]
1051+ ; GFX90A-NEXT: global_atomic_min_f64 v2, v[0:1], s[4:5 ]
10521052; GFX90A-NEXT: s_endpgm
10531053;
10541054; GFX940-LABEL: global_atomic_fmin_f64_noret:
10551055; GFX940: ; %bb.0: ; %main_body
1056- ; GFX940-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1056+ ; GFX940-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
10571057; GFX940-NEXT: v_mov_b32_e32 v2, 0
10581058; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1059- ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3 ]
1060- ; GFX940-NEXT: global_atomic_min_f64 v2, v[0:1], s[0:1 ]
1059+ ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7 ]
1060+ ; GFX940-NEXT: global_atomic_min_f64 v2, v[0:1], s[4:5 ]
10611061; GFX940-NEXT: s_endpgm
10621062main_body:
10631063 %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64 (ptr addrspace (1 ) %ptr , double %data )
@@ -1067,20 +1067,20 @@ main_body:
10671067define amdgpu_kernel void @global_atomic_fmax_f64_noret (ptr addrspace (1 ) %ptr , double %data ) {
10681068; GFX90A-LABEL: global_atomic_fmax_f64_noret:
10691069; GFX90A: ; %bb.0: ; %main_body
1070- ; GFX90A-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1070+ ; GFX90A-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
10711071; GFX90A-NEXT: v_mov_b32_e32 v2, 0
10721072; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1073- ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3 ], s[2:3 ] op_sel:[0,1]
1074- ; GFX90A-NEXT: global_atomic_max_f64 v2, v[0:1], s[0:1 ]
1073+ ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7 ], s[6:7 ] op_sel:[0,1]
1074+ ; GFX90A-NEXT: global_atomic_max_f64 v2, v[0:1], s[4:5 ]
10751075; GFX90A-NEXT: s_endpgm
10761076;
10771077; GFX940-LABEL: global_atomic_fmax_f64_noret:
10781078; GFX940: ; %bb.0: ; %main_body
1079- ; GFX940-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1079+ ; GFX940-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
10801080; GFX940-NEXT: v_mov_b32_e32 v2, 0
10811081; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1082- ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3 ]
1083- ; GFX940-NEXT: global_atomic_max_f64 v2, v[0:1], s[0:1 ]
1082+ ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7 ]
1083+ ; GFX940-NEXT: global_atomic_max_f64 v2, v[0:1], s[4:5 ]
10841084; GFX940-NEXT: s_endpgm
10851085main_body:
10861086 %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64 (ptr addrspace (1 ) %ptr , double %data )
@@ -1134,14 +1134,14 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
11341134; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
11351135; GFX940-NEXT: s_cbranch_execz .LBB39_2
11361136; GFX940-NEXT: ; %bb.1:
1137- ; GFX940-NEXT: s_load_dwordx2 s[2:3 ], s[2:3], 0x24
1137+ ; GFX940-NEXT: s_load_dwordx2 s[4:5 ], s[2:3], 0x24
11381138; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
11391139; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
11401140; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
11411141; GFX940-NEXT: v_mov_b32_e32 v2, 0
11421142; GFX940-NEXT: buffer_wbl2 sc0 sc1
11431143; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1144- ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3 ] sc1
1144+ ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5 ] sc1
11451145; GFX940-NEXT: s_waitcnt vmcnt(0)
11461146; GFX940-NEXT: buffer_inv sc0 sc1
11471147; GFX940-NEXT: .LBB39_2:
@@ -1162,13 +1162,13 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
11621162; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
11631163; GFX90A-NEXT: s_cbranch_execz .LBB40_2
11641164; GFX90A-NEXT: ; %bb.1:
1165- ; GFX90A-NEXT: s_load_dwordx2 s[2:3 ], s[2:3], 0x24
1165+ ; GFX90A-NEXT: s_load_dwordx2 s[4:5 ], s[2:3], 0x24
11661166; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
11671167; GFX90A-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
11681168; GFX90A-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
11691169; GFX90A-NEXT: v_mov_b32_e32 v2, 0
11701170; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1171- ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3 ]
1171+ ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5 ]
11721172; GFX90A-NEXT: s_waitcnt vmcnt(0)
11731173; GFX90A-NEXT: buffer_wbinvl1_vol
11741174; GFX90A-NEXT: .LBB40_2:
@@ -1184,14 +1184,14 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
11841184; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
11851185; GFX940-NEXT: s_cbranch_execz .LBB40_2
11861186; GFX940-NEXT: ; %bb.1:
1187- ; GFX940-NEXT: s_load_dwordx2 s[2:3 ], s[2:3], 0x24
1187+ ; GFX940-NEXT: s_load_dwordx2 s[4:5 ], s[2:3], 0x24
11881188; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
11891189; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
11901190; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
11911191; GFX940-NEXT: v_mov_b32_e32 v2, 0
11921192; GFX940-NEXT: buffer_wbl2 sc1
11931193; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1194- ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3 ]
1194+ ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5 ]
11951195; GFX940-NEXT: s_waitcnt vmcnt(0)
11961196; GFX940-NEXT: buffer_inv sc1
11971197; GFX940-NEXT: .LBB40_2:
@@ -1248,14 +1248,14 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
12481248; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
12491249; GFX940-NEXT: s_cbranch_execz .LBB41_2
12501250; GFX940-NEXT: ; %bb.1:
1251- ; GFX940-NEXT: s_load_dwordx2 s[2:3 ], s[2:3], 0x24
1251+ ; GFX940-NEXT: s_load_dwordx2 s[4:5 ], s[2:3], 0x24
12521252; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
12531253; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
12541254; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
12551255; GFX940-NEXT: v_mov_b32_e32 v2, 0
12561256; GFX940-NEXT: buffer_wbl2 sc0 sc1
12571257; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1258- ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3 ] sc1
1258+ ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5 ] sc1
12591259; GFX940-NEXT: s_waitcnt vmcnt(0)
12601260; GFX940-NEXT: buffer_inv sc0 sc1
12611261; GFX940-NEXT: .LBB41_2:
@@ -1276,13 +1276,13 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
12761276; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
12771277; GFX90A-NEXT: s_cbranch_execz .LBB42_2
12781278; GFX90A-NEXT: ; %bb.1:
1279- ; GFX90A-NEXT: s_load_dwordx2 s[2:3 ], s[2:3], 0x24
1279+ ; GFX90A-NEXT: s_load_dwordx2 s[4:5 ], s[2:3], 0x24
12801280; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
12811281; GFX90A-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
12821282; GFX90A-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
12831283; GFX90A-NEXT: v_mov_b32_e32 v2, 0
12841284; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1285- ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3 ]
1285+ ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5 ]
12861286; GFX90A-NEXT: s_waitcnt vmcnt(0)
12871287; GFX90A-NEXT: buffer_wbinvl1_vol
12881288; GFX90A-NEXT: .LBB42_2:
@@ -1298,14 +1298,14 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
12981298; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
12991299; GFX940-NEXT: s_cbranch_execz .LBB42_2
13001300; GFX940-NEXT: ; %bb.1:
1301- ; GFX940-NEXT: s_load_dwordx2 s[2:3 ], s[2:3], 0x24
1301+ ; GFX940-NEXT: s_load_dwordx2 s[4:5 ], s[2:3], 0x24
13021302; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
13031303; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
13041304; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
13051305; GFX940-NEXT: v_mov_b32_e32 v2, 0
13061306; GFX940-NEXT: buffer_wbl2 sc1
13071307; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1308- ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3 ]
1308+ ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5 ]
13091309; GFX940-NEXT: s_waitcnt vmcnt(0)
13101310; GFX940-NEXT: buffer_inv sc1
13111311; GFX940-NEXT: .LBB42_2:
@@ -1522,14 +1522,14 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs
15221522; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
15231523; GFX940-NEXT: s_cbranch_execz .LBB49_2
15241524; GFX940-NEXT: ; %bb.1:
1525- ; GFX940-NEXT: s_load_dwordx2 s[2:3 ], s[2:3], 0x24
1525+ ; GFX940-NEXT: s_load_dwordx2 s[4:5 ], s[2:3], 0x24
15261526; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
15271527; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
15281528; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
15291529; GFX940-NEXT: v_mov_b32_e32 v2, 0
15301530; GFX940-NEXT: buffer_wbl2 sc1
15311531; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1532- ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3 ]
1532+ ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5 ]
15331533; GFX940-NEXT: s_waitcnt vmcnt(0)
15341534; GFX940-NEXT: buffer_inv sc1
15351535; GFX940-NEXT: .LBB49_2:
@@ -1761,19 +1761,19 @@ main_body:
17611761define amdgpu_kernel void @flat_atomic_fadd_f64_noret (ptr %ptr , double %data ) {
17621762; GFX90A-LABEL: flat_atomic_fadd_f64_noret:
17631763; GFX90A: ; %bb.0: ; %main_body
1764- ; GFX90A-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1764+ ; GFX90A-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
17651765; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1766- ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1 ], s[0:1 ] op_sel:[0,1]
1767- ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[2:3 ], s[2:3 ] op_sel:[0,1]
1766+ ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[4:5 ], s[4:5 ] op_sel:[0,1]
1767+ ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7 ], s[6:7 ] op_sel:[0,1]
17681768; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
17691769; GFX90A-NEXT: s_endpgm
17701770;
17711771; GFX940-LABEL: flat_atomic_fadd_f64_noret:
17721772; GFX940: ; %bb.0: ; %main_body
1773- ; GFX940-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1773+ ; GFX940-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
17741774; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1775- ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1 ]
1776- ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3 ]
1775+ ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[4:5 ]
1776+ ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[6:7 ]
17771777; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
17781778; GFX940-NEXT: s_endpgm
17791779main_body:
@@ -1842,19 +1842,19 @@ main_body:
18421842define amdgpu_kernel void @flat_atomic_fmin_f64_noret (ptr %ptr , double %data ) {
18431843; GFX90A-LABEL: flat_atomic_fmin_f64_noret:
18441844; GFX90A: ; %bb.0: ; %main_body
1845- ; GFX90A-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1845+ ; GFX90A-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
18461846; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1847- ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1 ], s[0:1 ] op_sel:[0,1]
1848- ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[2:3 ], s[2:3 ] op_sel:[0,1]
1847+ ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[4:5 ], s[4:5 ] op_sel:[0,1]
1848+ ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7 ], s[6:7 ] op_sel:[0,1]
18491849; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
18501850; GFX90A-NEXT: s_endpgm
18511851;
18521852; GFX940-LABEL: flat_atomic_fmin_f64_noret:
18531853; GFX940: ; %bb.0: ; %main_body
1854- ; GFX940-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1854+ ; GFX940-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
18551855; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1856- ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1 ]
1857- ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3 ]
1856+ ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[4:5 ]
1857+ ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[6:7 ]
18581858; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
18591859; GFX940-NEXT: s_endpgm
18601860main_body:
@@ -1884,19 +1884,19 @@ main_body:
18841884define amdgpu_kernel void @flat_atomic_fmax_f64_noret (ptr %ptr , double %data ) {
18851885; GFX90A-LABEL: flat_atomic_fmax_f64_noret:
18861886; GFX90A: ; %bb.0: ; %main_body
1887- ; GFX90A-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1887+ ; GFX90A-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
18881888; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1889- ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1 ], s[0:1 ] op_sel:[0,1]
1890- ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[2:3 ], s[2:3 ] op_sel:[0,1]
1889+ ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[4:5 ], s[4:5 ] op_sel:[0,1]
1890+ ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7 ], s[6:7 ] op_sel:[0,1]
18911891; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
18921892; GFX90A-NEXT: s_endpgm
18931893;
18941894; GFX940-LABEL: flat_atomic_fmax_f64_noret:
18951895; GFX940: ; %bb.0: ; %main_body
1896- ; GFX940-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1896+ ; GFX940-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
18971897; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1898- ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1 ]
1899- ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3 ]
1898+ ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[4:5 ]
1899+ ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[6:7 ]
19001900; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
19011901; GFX940-NEXT: s_endpgm
19021902main_body:
0 commit comments