@@ -416,298 +416,4 @@ define <2 x i16> @local_atomic_fadd_v2bf16_rtn(ptr addrspace(3) %ptr, <2 x i16>
416416 ret <2 x i16 > %ret
417417}
418418
419- define float @flat_atomic_fadd_f32_intrinsic_ret__posoffset (ptr %ptr , float %data ) {
420- ; GFX940-LABEL: flat_atomic_fadd_f32_intrinsic_ret__posoffset:
421- ; GFX940: ; %bb.0:
422- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
423- ; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:4092 sc0
424- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
425- ; GFX940-NEXT: s_setpc_b64 s[30:31]
426- ;
427- ; GFX12-LABEL: flat_atomic_fadd_f32_intrinsic_ret__posoffset:
428- ; GFX12: ; %bb.0:
429- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
430- ; GFX12-NEXT: s_wait_expcnt 0x0
431- ; GFX12-NEXT: s_wait_samplecnt 0x0
432- ; GFX12-NEXT: s_wait_bvhcnt 0x0
433- ; GFX12-NEXT: s_wait_kmcnt 0x0
434- ; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:4092 th:TH_ATOMIC_RETURN
435- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
436- ; GFX12-NEXT: s_setpc_b64 s[30:31]
437- %gep = getelementptr float , ptr %ptr , i64 1023
438- %result = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32 (ptr %gep , float %data )
439- ret float %result
440- }
441-
442- define float @flat_atomic_fadd_f32_intrinsic_ret__negoffset (ptr %ptr , float %data ) {
443- ; GFX940-LABEL: flat_atomic_fadd_f32_intrinsic_ret__negoffset:
444- ; GFX940: ; %bb.0:
445- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
446- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffffc00, v0
447- ; GFX940-NEXT: s_nop 1
448- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
449- ; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0
450- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
451- ; GFX940-NEXT: s_setpc_b64 s[30:31]
452- ;
453- ; GFX12-LABEL: flat_atomic_fadd_f32_intrinsic_ret__negoffset:
454- ; GFX12: ; %bb.0:
455- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
456- ; GFX12-NEXT: s_wait_expcnt 0x0
457- ; GFX12-NEXT: s_wait_samplecnt 0x0
458- ; GFX12-NEXT: s_wait_bvhcnt 0x0
459- ; GFX12-NEXT: s_wait_kmcnt 0x0
460- ; GFX12-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:-1024 th:TH_ATOMIC_RETURN
461- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
462- ; GFX12-NEXT: s_setpc_b64 s[30:31]
463- %gep = getelementptr float , ptr %ptr , i64 -256
464- %result = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32 (ptr %gep , float %data )
465- ret float %result
466- }
467-
468- define void @flat_atomic_fadd_f32_intrinsic_noret__posoffset (ptr %ptr , float %data ) {
469- ; GFX940-LABEL: flat_atomic_fadd_f32_intrinsic_noret__posoffset:
470- ; GFX940: ; %bb.0:
471- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
472- ; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:4092
473- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
474- ; GFX940-NEXT: s_setpc_b64 s[30:31]
475- ;
476- ; GFX12-LABEL: flat_atomic_fadd_f32_intrinsic_noret__posoffset:
477- ; GFX12: ; %bb.0:
478- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
479- ; GFX12-NEXT: s_wait_expcnt 0x0
480- ; GFX12-NEXT: s_wait_samplecnt 0x0
481- ; GFX12-NEXT: s_wait_bvhcnt 0x0
482- ; GFX12-NEXT: s_wait_kmcnt 0x0
483- ; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:4092
484- ; GFX12-NEXT: s_wait_dscnt 0x0
485- ; GFX12-NEXT: s_setpc_b64 s[30:31]
486- %gep = getelementptr float , ptr %ptr , i64 1023
487- %unused = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32 (ptr %gep , float %data )
488- ret void
489- }
490-
491- define void @flat_atomic_fadd_f32_intrinsic_noret__negoffset (ptr %ptr , float %data ) {
492- ; GFX940-LABEL: flat_atomic_fadd_f32_intrinsic_noret__negoffset:
493- ; GFX940: ; %bb.0:
494- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
495- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffffc00, v0
496- ; GFX940-NEXT: s_nop 1
497- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
498- ; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2
499- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
500- ; GFX940-NEXT: s_setpc_b64 s[30:31]
501- ;
502- ; GFX12-LABEL: flat_atomic_fadd_f32_intrinsic_noret__negoffset:
503- ; GFX12: ; %bb.0:
504- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
505- ; GFX12-NEXT: s_wait_expcnt 0x0
506- ; GFX12-NEXT: s_wait_samplecnt 0x0
507- ; GFX12-NEXT: s_wait_bvhcnt 0x0
508- ; GFX12-NEXT: s_wait_kmcnt 0x0
509- ; GFX12-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:-1024
510- ; GFX12-NEXT: s_wait_dscnt 0x0
511- ; GFX12-NEXT: s_setpc_b64 s[30:31]
512- %gep = getelementptr float , ptr %ptr , i64 -256
513- %unused = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32 (ptr %gep , float %data )
514- ret void
515- }
516-
517- define <2 x half > @flat_atomic_fadd_v2f16_intrinsic_ret__posoffset (ptr %ptr , <2 x half > %data ) {
518- ; GFX940-LABEL: flat_atomic_fadd_v2f16_intrinsic_ret__posoffset:
519- ; GFX940: ; %bb.0:
520- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521- ; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:4092 sc0
522- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
523- ; GFX940-NEXT: s_setpc_b64 s[30:31]
524- ;
525- ; GFX12-LABEL: flat_atomic_fadd_v2f16_intrinsic_ret__posoffset:
526- ; GFX12: ; %bb.0:
527- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
528- ; GFX12-NEXT: s_wait_expcnt 0x0
529- ; GFX12-NEXT: s_wait_samplecnt 0x0
530- ; GFX12-NEXT: s_wait_bvhcnt 0x0
531- ; GFX12-NEXT: s_wait_kmcnt 0x0
532- ; GFX12-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:4092 th:TH_ATOMIC_RETURN
533- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
534- ; GFX12-NEXT: s_setpc_b64 s[30:31]
535- %gep = getelementptr <2 x half >, ptr %ptr , i64 1023
536- %result = call <2 x half > @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16 (ptr %gep , <2 x half > %data )
537- ret <2 x half > %result
538- }
539-
540- define <2 x half > @flat_atomic_fadd_v2f16_intrinsic_ret__negoffset (ptr %ptr , <2 x half > %data ) {
541- ; GFX940-LABEL: flat_atomic_fadd_v2f16_intrinsic_ret__negoffset:
542- ; GFX940: ; %bb.0:
543- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
544- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffffc00, v0
545- ; GFX940-NEXT: s_nop 1
546- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
547- ; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0
548- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
549- ; GFX940-NEXT: s_setpc_b64 s[30:31]
550- ;
551- ; GFX12-LABEL: flat_atomic_fadd_v2f16_intrinsic_ret__negoffset:
552- ; GFX12: ; %bb.0:
553- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
554- ; GFX12-NEXT: s_wait_expcnt 0x0
555- ; GFX12-NEXT: s_wait_samplecnt 0x0
556- ; GFX12-NEXT: s_wait_bvhcnt 0x0
557- ; GFX12-NEXT: s_wait_kmcnt 0x0
558- ; GFX12-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:-1024 th:TH_ATOMIC_RETURN
559- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
560- ; GFX12-NEXT: s_setpc_b64 s[30:31]
561- %gep = getelementptr <2 x half >, ptr %ptr , i64 -256
562- %result = call <2 x half > @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16 (ptr %gep , <2 x half > %data )
563- ret <2 x half > %result
564- }
565-
566- define void @flat_atomic_fadd_v2f16_intrinsic_noret__posoffset (ptr %ptr , <2 x half > %data ) {
567- ; GFX940-LABEL: flat_atomic_fadd_v2f16_intrinsic_noret__posoffset:
568- ; GFX940: ; %bb.0:
569- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
570- ; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:4092
571- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
572- ; GFX940-NEXT: s_setpc_b64 s[30:31]
573- ;
574- ; GFX12-LABEL: flat_atomic_fadd_v2f16_intrinsic_noret__posoffset:
575- ; GFX12: ; %bb.0:
576- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
577- ; GFX12-NEXT: s_wait_expcnt 0x0
578- ; GFX12-NEXT: s_wait_samplecnt 0x0
579- ; GFX12-NEXT: s_wait_bvhcnt 0x0
580- ; GFX12-NEXT: s_wait_kmcnt 0x0
581- ; GFX12-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:4092
582- ; GFX12-NEXT: s_wait_dscnt 0x0
583- ; GFX12-NEXT: s_setpc_b64 s[30:31]
584- %gep = getelementptr <2 x half >, ptr %ptr , i64 1023
585- %unused = call <2 x half > @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16 (ptr %gep , <2 x half > %data )
586- ret void
587- }
588-
589- define void @flat_atomic_fadd_v2f16_intrinsic_noret__negoffset (ptr %ptr , <2 x half > %data ) {
590- ; GFX940-LABEL: flat_atomic_fadd_v2f16_intrinsic_noret__negoffset:
591- ; GFX940: ; %bb.0:
592- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
593- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffffc00, v0
594- ; GFX940-NEXT: s_nop 1
595- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
596- ; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2
597- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
598- ; GFX940-NEXT: s_setpc_b64 s[30:31]
599- ;
600- ; GFX12-LABEL: flat_atomic_fadd_v2f16_intrinsic_noret__negoffset:
601- ; GFX12: ; %bb.0:
602- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
603- ; GFX12-NEXT: s_wait_expcnt 0x0
604- ; GFX12-NEXT: s_wait_samplecnt 0x0
605- ; GFX12-NEXT: s_wait_bvhcnt 0x0
606- ; GFX12-NEXT: s_wait_kmcnt 0x0
607- ; GFX12-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:-1024
608- ; GFX12-NEXT: s_wait_dscnt 0x0
609- ; GFX12-NEXT: s_setpc_b64 s[30:31]
610- %gep = getelementptr <2 x half >, ptr %ptr , i64 -256
611- %unused = call <2 x half > @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16 (ptr %gep , <2 x half > %data )
612- ret void
613- }
614-
615- define <2 x i16 > @flat_atomic_fadd_v2bf16_intrinsic_ret__posoffset (ptr %ptr , <2 x i16 > %data ) {
616- ; GFX940-LABEL: flat_atomic_fadd_v2bf16_intrinsic_ret__posoffset:
617- ; GFX940: ; %bb.0:
618- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
619- ; GFX940-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:4092 sc0
620- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
621- ; GFX940-NEXT: s_setpc_b64 s[30:31]
622- ;
623- ; GFX12-LABEL: flat_atomic_fadd_v2bf16_intrinsic_ret__posoffset:
624- ; GFX12: ; %bb.0:
625- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
626- ; GFX12-NEXT: s_wait_expcnt 0x0
627- ; GFX12-NEXT: s_wait_samplecnt 0x0
628- ; GFX12-NEXT: s_wait_bvhcnt 0x0
629- ; GFX12-NEXT: s_wait_kmcnt 0x0
630- ; GFX12-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:4092 th:TH_ATOMIC_RETURN
631- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
632- ; GFX12-NEXT: s_setpc_b64 s[30:31]
633- %gep = getelementptr <2 x i16 >, ptr %ptr , i64 1023
634- %result = call <2 x i16 > @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0.v2bf16 (ptr %gep , <2 x i16 > %data )
635- ret <2 x i16 > %result
636- }
637-
638- define <2 x i16 > @flat_atomic_fadd_v2bf16_intrinsic_ret__negoffset (ptr %ptr , <2 x i16 > %data ) {
639- ; GFX940-LABEL: flat_atomic_fadd_v2bf16_intrinsic_ret__negoffset:
640- ; GFX940: ; %bb.0:
641- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
642- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffffc00, v0
643- ; GFX940-NEXT: s_nop 1
644- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
645- ; GFX940-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0
646- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
647- ; GFX940-NEXT: s_setpc_b64 s[30:31]
648- ;
649- ; GFX12-LABEL: flat_atomic_fadd_v2bf16_intrinsic_ret__negoffset:
650- ; GFX12: ; %bb.0:
651- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
652- ; GFX12-NEXT: s_wait_expcnt 0x0
653- ; GFX12-NEXT: s_wait_samplecnt 0x0
654- ; GFX12-NEXT: s_wait_bvhcnt 0x0
655- ; GFX12-NEXT: s_wait_kmcnt 0x0
656- ; GFX12-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:-1024 th:TH_ATOMIC_RETURN
657- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
658- ; GFX12-NEXT: s_setpc_b64 s[30:31]
659- %gep = getelementptr <2 x i16 >, ptr %ptr , i64 -256
660- %result = call <2 x i16 > @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0.v2bf16 (ptr %gep , <2 x i16 > %data )
661- ret <2 x i16 > %result
662- }
663-
664- define void @flat_atomic_fadd_v2bf16_intrinsic_noret__posoffset (ptr %ptr , <2 x i16 > %data ) {
665- ; GFX940-LABEL: flat_atomic_fadd_v2bf16_intrinsic_noret__posoffset:
666- ; GFX940: ; %bb.0:
667- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
668- ; GFX940-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:4092
669- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
670- ; GFX940-NEXT: s_setpc_b64 s[30:31]
671- ;
672- ; GFX12-LABEL: flat_atomic_fadd_v2bf16_intrinsic_noret__posoffset:
673- ; GFX12: ; %bb.0:
674- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
675- ; GFX12-NEXT: s_wait_expcnt 0x0
676- ; GFX12-NEXT: s_wait_samplecnt 0x0
677- ; GFX12-NEXT: s_wait_bvhcnt 0x0
678- ; GFX12-NEXT: s_wait_kmcnt 0x0
679- ; GFX12-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:4092
680- ; GFX12-NEXT: s_wait_dscnt 0x0
681- ; GFX12-NEXT: s_setpc_b64 s[30:31]
682- %gep = getelementptr <2 x i16 >, ptr %ptr , i64 1023
683- %unused = call <2 x i16 > @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0.v2bf16 (ptr %gep , <2 x i16 > %data )
684- ret void
685- }
686-
687- define void @flat_atomic_fadd_v2bf16_intrinsic_noret__negoffset (ptr %ptr , <2 x i16 > %data ) {
688- ; GFX940-LABEL: flat_atomic_fadd_v2bf16_intrinsic_noret__negoffset:
689- ; GFX940: ; %bb.0:
690- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
691- ; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffffc00, v0
692- ; GFX940-NEXT: s_nop 1
693- ; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
694- ; GFX940-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2
695- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
696- ; GFX940-NEXT: s_setpc_b64 s[30:31]
697- ;
698- ; GFX12-LABEL: flat_atomic_fadd_v2bf16_intrinsic_noret__negoffset:
699- ; GFX12: ; %bb.0:
700- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
701- ; GFX12-NEXT: s_wait_expcnt 0x0
702- ; GFX12-NEXT: s_wait_samplecnt 0x0
703- ; GFX12-NEXT: s_wait_bvhcnt 0x0
704- ; GFX12-NEXT: s_wait_kmcnt 0x0
705- ; GFX12-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:-1024
706- ; GFX12-NEXT: s_wait_dscnt 0x0
707- ; GFX12-NEXT: s_setpc_b64 s[30:31]
708- %gep = getelementptr <2 x i16 >, ptr %ptr , i64 -256
709- %unused = call <2 x i16 > @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0.v2bf16 (ptr %gep , <2 x i16 > %data )
710- ret void
711- }
712-
713419attributes #0 = { "denormal-fp-math-f32" ="ieee,ieee" }
0 commit comments