@@ -2256,264 +2256,6 @@ main_body:
22562256 ret double %ret
22572257}
22582258
2259- define double @flat_atomic_fadd_f64_intrinsic_rtn__posoffset (ptr %ptr , double %data ) #1 {
2260- ; GFX90A-LABEL: flat_atomic_fadd_f64_intrinsic_rtn__posoffset:
2261- ; GFX90A: ; %bb.0:
2262- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2263- ; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
2264- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2265- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2266- ;
2267- ; GFX940-LABEL: flat_atomic_fadd_f64_intrinsic_rtn__posoffset:
2268- ; GFX940: ; %bb.0:
2269- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2270- ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0
2271- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2272- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2273- %gep = getelementptr double , ptr %ptr , i64 511
2274- %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64 (ptr %ptr , double %data )
2275- ret double %ret
2276- }
2277-
2278- define double @flat_atomic_fadd_f64_intrinsic_rtn__negoffset (ptr %ptr , double %data ) #1 {
2279- ; GFX90A-LABEL: flat_atomic_fadd_f64_intrinsic_rtn__negoffset:
2280- ; GFX90A: ; %bb.0:
2281- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2282- ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2283- ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2284- ; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
2285- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2286- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2287- ;
2288- ; GFX940-LABEL: flat_atomic_fadd_f64_intrinsic_rtn__negoffset:
2289- ; GFX940: ; %bb.0:
2290- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2291- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2292- ; GFX940-NEXT: s_nop 1
2293- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2294- ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0
2295- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2296- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2297- %gep = getelementptr double , ptr %ptr , i64 -511
2298- %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64 (ptr %gep , double %data )
2299- ret double %ret
2300- }
2301-
2302- define void @flat_atomic_fadd_f64_intrinsic_noret__posoffset (ptr %ptr , double %data ) #1 {
2303- ; GFX90A-LABEL: flat_atomic_fadd_f64_intrinsic_noret__posoffset:
2304- ; GFX90A: ; %bb.0:
2305- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2306- ; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
2307- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2308- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2309- ;
2310- ; GFX940-LABEL: flat_atomic_fadd_f64_intrinsic_noret__posoffset:
2311- ; GFX940: ; %bb.0:
2312- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2313- ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
2314- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2315- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2316- %gep = getelementptr double , ptr %ptr , i64 511
2317- %unused = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64 (ptr %ptr , double %data )
2318- ret void
2319- }
2320-
2321- define void @flat_atomic_fadd_f64_intrinsic_noret__negoffset (ptr %ptr , double %data ) #1 {
2322- ; GFX90A-LABEL: flat_atomic_fadd_f64_intrinsic_noret__negoffset:
2323- ; GFX90A: ; %bb.0:
2324- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2325- ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2326- ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2327- ; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
2328- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2329- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2330- ;
2331- ; GFX940-LABEL: flat_atomic_fadd_f64_intrinsic_noret__negoffset:
2332- ; GFX940: ; %bb.0:
2333- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2334- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2335- ; GFX940-NEXT: s_nop 1
2336- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2337- ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
2338- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2339- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2340- %gep = getelementptr double , ptr %ptr , i64 -511
2341- %unused = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64 (ptr %gep , double %data )
2342- ret void
2343- }
2344-
2345- define double @flat_atomic_fmin_f64_intrinsic_rtn__posoffset (ptr %ptr , double %data ) #1 {
2346- ; GFX90A-LABEL: flat_atomic_fmin_f64_intrinsic_rtn__posoffset:
2347- ; GFX90A: ; %bb.0:
2348- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2349- ; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc
2350- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2351- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2352- ;
2353- ; GFX940-LABEL: flat_atomic_fmin_f64_intrinsic_rtn__posoffset:
2354- ; GFX940: ; %bb.0:
2355- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2356- ; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] sc0
2357- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2358- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2359- %gep = getelementptr double , ptr %ptr , i64 511
2360- %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64 (ptr %ptr , double %data )
2361- ret double %ret
2362- }
2363-
2364- define double @flat_atomic_fmin_f64_intrinsic_rtn__negoffset (ptr %ptr , double %data ) #1 {
2365- ; GFX90A-LABEL: flat_atomic_fmin_f64_intrinsic_rtn__negoffset:
2366- ; GFX90A: ; %bb.0:
2367- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2368- ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2369- ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2370- ; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc
2371- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2372- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2373- ;
2374- ; GFX940-LABEL: flat_atomic_fmin_f64_intrinsic_rtn__negoffset:
2375- ; GFX940: ; %bb.0:
2376- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2377- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2378- ; GFX940-NEXT: s_nop 1
2379- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2380- ; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] sc0
2381- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2382- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2383- %gep = getelementptr double , ptr %ptr , i64 -511
2384- %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64 (ptr %gep , double %data )
2385- ret double %ret
2386- }
2387-
2388- define void @flat_atomic_fmin_f64_intrinsic_noret__posoffset (ptr %ptr , double %data ) #1 {
2389- ; GFX90A-LABEL: flat_atomic_fmin_f64_intrinsic_noret__posoffset:
2390- ; GFX90A: ; %bb.0:
2391- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2392- ; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
2393- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2394- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2395- ;
2396- ; GFX940-LABEL: flat_atomic_fmin_f64_intrinsic_noret__posoffset:
2397- ; GFX940: ; %bb.0:
2398- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2399- ; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
2400- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2401- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2402- %gep = getelementptr double , ptr %ptr , i64 511
2403- %unused = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64 (ptr %ptr , double %data )
2404- ret void
2405- }
2406-
2407- define void @flat_atomic_fmin_f64_intrinsic_noret__negoffset (ptr %ptr , double %data ) #1 {
2408- ; GFX90A-LABEL: flat_atomic_fmin_f64_intrinsic_noret__negoffset:
2409- ; GFX90A: ; %bb.0:
2410- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2411- ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2412- ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2413- ; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
2414- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2415- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2416- ;
2417- ; GFX940-LABEL: flat_atomic_fmin_f64_intrinsic_noret__negoffset:
2418- ; GFX940: ; %bb.0:
2419- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2420- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2421- ; GFX940-NEXT: s_nop 1
2422- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2423- ; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
2424- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2425- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2426- %gep = getelementptr double , ptr %ptr , i64 -511
2427- %unused = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64 (ptr %gep , double %data )
2428- ret void
2429- }
2430-
2431- define double @flat_atomic_fmax_f64_intrinsic_rtn__posoffset (ptr %ptr , double %data ) #1 {
2432- ; GFX90A-LABEL: flat_atomic_fmax_f64_intrinsic_rtn__posoffset:
2433- ; GFX90A: ; %bb.0:
2434- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2435- ; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] glc
2436- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2437- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2438- ;
2439- ; GFX940-LABEL: flat_atomic_fmax_f64_intrinsic_rtn__posoffset:
2440- ; GFX940: ; %bb.0:
2441- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2442- ; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] sc0
2443- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2444- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2445- %gep = getelementptr double , ptr %ptr , i64 511
2446- %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64 (ptr %ptr , double %data )
2447- ret double %ret
2448- }
2449-
2450- define double @flat_atomic_fmax_f64_intrinsic_rtn__negoffset (ptr %ptr , double %data ) #1 {
2451- ; GFX90A-LABEL: flat_atomic_fmax_f64_intrinsic_rtn__negoffset:
2452- ; GFX90A: ; %bb.0:
2453- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2454- ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2455- ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2456- ; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] glc
2457- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2458- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2459- ;
2460- ; GFX940-LABEL: flat_atomic_fmax_f64_intrinsic_rtn__negoffset:
2461- ; GFX940: ; %bb.0:
2462- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2463- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2464- ; GFX940-NEXT: s_nop 1
2465- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2466- ; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] sc0
2467- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2468- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2469- %gep = getelementptr double , ptr %ptr , i64 -511
2470- %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64 (ptr %gep , double %data )
2471- ret double %ret
2472- }
2473-
2474- define void @flat_atomic_fmax_f64_intrinsic_noret__posoffset (ptr %ptr , double %data ) #1 {
2475- ; GFX90A-LABEL: flat_atomic_fmax_f64_intrinsic_noret__posoffset:
2476- ; GFX90A: ; %bb.0:
2477- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2478- ; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
2479- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2480- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2481- ;
2482- ; GFX940-LABEL: flat_atomic_fmax_f64_intrinsic_noret__posoffset:
2483- ; GFX940: ; %bb.0:
2484- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2485- ; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
2486- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2487- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2488- %gep = getelementptr double , ptr %ptr , i64 511
2489- %unused = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64 (ptr %ptr , double %data )
2490- ret void
2491- }
2492-
2493- define void @flat_atomic_fmax_f64_intrinsic_noret__negoffset (ptr %ptr , double %data ) #1 {
2494- ; GFX90A-LABEL: flat_atomic_fmax_f64_intrinsic_noret__negoffset:
2495- ; GFX90A: ; %bb.0:
2496- ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2497- ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2498- ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2499- ; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
2500- ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2501- ; GFX90A-NEXT: s_setpc_b64 s[30:31]
2502- ;
2503- ; GFX940-LABEL: flat_atomic_fmax_f64_intrinsic_noret__negoffset:
2504- ; GFX940: ; %bb.0:
2505- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2506- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff008, v0
2507- ; GFX940-NEXT: s_nop 1
2508- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2509- ; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
2510- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2511- ; GFX940-NEXT: s_setpc_b64 s[30:31]
2512- %gep = getelementptr double , ptr %ptr , i64 -511
2513- %unused = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64 (ptr %gep , double %data )
2514- ret void
2515- }
2516-
25172259attributes #0 = { "denormal-fp-math" ="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics" ="true" }
25182260attributes #1 = { "amdgpu-unsafe-fp-atomics" ="true" }
25192261attributes #2 = { "denormal-fp-math" ="ieee,ieee" "amdgpu-unsafe-fp-atomics" ="true" }
0 commit comments