@@ -2794,6 +2794,41 @@ define hidden void @extract3744(ptr addrspace(1) %in0, ptr addrspace(1) %in1, pt
27942794 ret void
27952795}
27962796
2797+ declare i32 @llvm.amdgcn.perm (i32 , i32 , i32 )
2798+
2799+ define hidden void @extract_perm_3744 (ptr addrspace (1 ) %in0 , ptr addrspace (1 ) %in1 , ptr addrspace (1 ) %out0 ) {
2800+ ; GFX10-LABEL: extract_perm_3744:
2801+ ; GFX10: ; %bb.0:
2802+ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2803+ ; GFX10-NEXT: global_load_dword v6, v[0:1], off
2804+ ; GFX10-NEXT: global_load_dword v7, v[2:3], off
2805+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
2806+ ; GFX10-NEXT: v_perm_b32 v0, v6, v7, 0x3070404
2807+ ; GFX10-NEXT: global_store_dword v[4:5], v0, off
2808+ ; GFX10-NEXT: s_setpc_b64 s[30:31]
2809+ ;
2810+ ; GFX9-LABEL: extract_perm_3744:
2811+ ; GFX9: ; %bb.0:
2812+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2813+ ; GFX9-NEXT: global_load_dword v6, v[0:1], off
2814+ ; GFX9-NEXT: global_load_dword v7, v[2:3], off
2815+ ; GFX9-NEXT: s_mov_b32 s4, 0x3070404
2816+ ; GFX9-NEXT: s_waitcnt vmcnt(0)
2817+ ; GFX9-NEXT: v_perm_b32 v0, v6, v7, s4
2818+ ; GFX9-NEXT: global_store_dword v[4:5], v0, off
2819+ ; GFX9-NEXT: s_waitcnt vmcnt(0)
2820+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
2821+ %vec1 = load <4 x i8 >, ptr addrspace (1 ) %in0 , align 4
2822+ %vec2 = load <4 x i8 >, ptr addrspace (1 ) %in1 , align 4
2823+ %cast1 = bitcast <4 x i8 > %vec1 to i32
2824+ %cast2 = bitcast <4 x i8 > %vec2 to i32
2825+ %lo24 = call i32 @llvm.amdgcn.perm (i32 %cast1 , i32 %cast1 , i32 201523200 )
2826+ %hi8 = call i32 @llvm.amdgcn.perm (i32 %cast2 , i32 %cast2 , i32 51121164 )
2827+ %res = or i32 %hi8 , %lo24
2828+ store i32 %res , ptr addrspace (1 ) %out0 , align 4
2829+ ret void
2830+ }
2831+
27972832define hidden void @extract1347_v2i16 (ptr addrspace (1 ) %in0 , ptr addrspace (1 ) %in1 , ptr addrspace (1 ) %out0 ) {
27982833; GFX10-LABEL: extract1347_v2i16:
27992834; GFX10: ; %bb.0:
0 commit comments