33// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-LE
44// RUN: %clang_cc1 -O3 -triple powerpc64-unknown-unknown -target-cpu pwr10 \
55// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-BE
6+ // RUN: %clang_cc1 -O0 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
7+ // RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-LE-NOOPT
68
79// CHECK-LE-LABEL: @test1(
810// CHECK-LE-NEXT: entry:
1618// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2:![0-9]+]]
1719// CHECK-BE-NEXT: ret void
1820//
21+ // CHECK-LE-NOOPT-LABEL: @test1(
22+ // CHECK-LE-NOOPT-NEXT: entry:
23+ // CHECK-LE-NOOPT-NEXT: [[VQP_ADDR:%.*]] = alloca ptr, align 8
24+ // CHECK-LE-NOOPT-NEXT: [[VPP_ADDR:%.*]] = alloca ptr, align 8
25+ // CHECK-LE-NOOPT-NEXT: [[VC1_ADDR:%.*]] = alloca <16 x i8>, align 16
26+ // CHECK-LE-NOOPT-NEXT: [[VC2_ADDR:%.*]] = alloca <16 x i8>, align 16
27+ // CHECK-LE-NOOPT-NEXT: [[VC3_ADDR:%.*]] = alloca <16 x i8>, align 16
28+ // CHECK-LE-NOOPT-NEXT: [[VC4_ADDR:%.*]] = alloca <16 x i8>, align 16
29+ // CHECK-LE-NOOPT-NEXT: [[RESP_ADDR:%.*]] = alloca ptr, align 8
30+ // CHECK-LE-NOOPT-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64
31+ // CHECK-LE-NOOPT-NEXT: [[VP:%.*]] = alloca <256 x i1>, align 32
32+ // CHECK-LE-NOOPT-NEXT: [[RES:%.*]] = alloca <512 x i1>, align 64
33+ // CHECK-LE-NOOPT-NEXT: store ptr [[VQP:%.*]], ptr [[VQP_ADDR]], align 8
34+ // CHECK-LE-NOOPT-NEXT: store ptr [[VPP:%.*]], ptr [[VPP_ADDR]], align 8
35+ // CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC1:%.*]], ptr [[VC1_ADDR]], align 16
36+ // CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC2:%.*]], ptr [[VC2_ADDR]], align 16
37+ // CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC3:%.*]], ptr [[VC3_ADDR]], align 16
38+ // CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC4:%.*]], ptr [[VC4_ADDR]], align 16
39+ // CHECK-LE-NOOPT-NEXT: store ptr [[RESP:%.*]], ptr [[RESP_ADDR]], align 8
40+ // CHECK-LE-NOOPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VQP_ADDR]], align 8
41+ // CHECK-LE-NOOPT-NEXT: [[TMP1:%.*]] = load <512 x i1>, ptr [[TMP0]], align 64
42+ // CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP1]], ptr [[VQ]], align 64
43+ // CHECK-LE-NOOPT-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VPP_ADDR]], align 8
44+ // CHECK-LE-NOOPT-NEXT: [[TMP3:%.*]] = load <256 x i1>, ptr [[TMP2]], align 32
45+ // CHECK-LE-NOOPT-NEXT: store <256 x i1> [[TMP3]], ptr [[VP]], align 32
46+ // CHECK-LE-NOOPT-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[VC1_ADDR]], align 16
47+ // CHECK-LE-NOOPT-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC2_ADDR]], align 16
48+ // CHECK-LE-NOOPT-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[VC3_ADDR]], align 16
49+ // CHECK-LE-NOOPT-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr [[VC4_ADDR]], align 16
50+ // CHECK-LE-NOOPT-NEXT: [[TMP8:%.*]] = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[TMP7]], <16 x i8> [[TMP6]], <16 x i8> [[TMP5]], <16 x i8> [[TMP4]])
51+ // CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP8]], ptr [[RES]], align 64
52+ // CHECK-LE-NOOPT-NEXT: [[TMP9:%.*]] = load <512 x i1>, ptr [[RES]], align 64
53+ // CHECK-LE-NOOPT-NEXT: [[TMP10:%.*]] = load ptr, ptr [[RESP_ADDR]], align 8
54+ // CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP9]], ptr [[TMP10]], align 64
55+ // CHECK-LE-NOOPT-NEXT: ret void
56+ //
1957void test1 (unsigned char * vqp , unsigned char * vpp , vector unsigned char vc1 , vector unsigned char vc2 ,
2058 vector unsigned char vc3 , vector unsigned char vc4 , unsigned char * resp ) {
2159 __vector_quad vq = * ((__vector_quad * )vqp );
@@ -37,6 +75,36 @@ void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, vec
3775// CHECK-BE-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6:![0-9]+]]
3876// CHECK-BE-NEXT: ret void
3977//
78+ // CHECK-LE-NOOPT-LABEL: @test2(
79+ // CHECK-LE-NOOPT-NEXT: entry:
80+ // CHECK-LE-NOOPT-NEXT: [[VQP_ADDR:%.*]] = alloca ptr, align 8
81+ // CHECK-LE-NOOPT-NEXT: [[VPP_ADDR:%.*]] = alloca ptr, align 8
82+ // CHECK-LE-NOOPT-NEXT: [[VC1_ADDR:%.*]] = alloca <16 x i8>, align 16
83+ // CHECK-LE-NOOPT-NEXT: [[VC2_ADDR:%.*]] = alloca <16 x i8>, align 16
84+ // CHECK-LE-NOOPT-NEXT: [[RESP_ADDR:%.*]] = alloca ptr, align 8
85+ // CHECK-LE-NOOPT-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64
86+ // CHECK-LE-NOOPT-NEXT: [[VP:%.*]] = alloca <256 x i1>, align 32
87+ // CHECK-LE-NOOPT-NEXT: [[RES:%.*]] = alloca <256 x i1>, align 32
88+ // CHECK-LE-NOOPT-NEXT: store ptr [[VQP:%.*]], ptr [[VQP_ADDR]], align 8
89+ // CHECK-LE-NOOPT-NEXT: store ptr [[VPP:%.*]], ptr [[VPP_ADDR]], align 8
90+ // CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC1:%.*]], ptr [[VC1_ADDR]], align 16
91+ // CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC2:%.*]], ptr [[VC2_ADDR]], align 16
92+ // CHECK-LE-NOOPT-NEXT: store ptr [[RESP:%.*]], ptr [[RESP_ADDR]], align 8
93+ // CHECK-LE-NOOPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VQP_ADDR]], align 8
94+ // CHECK-LE-NOOPT-NEXT: [[TMP1:%.*]] = load <512 x i1>, ptr [[TMP0]], align 64
95+ // CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP1]], ptr [[VQ]], align 64
96+ // CHECK-LE-NOOPT-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VPP_ADDR]], align 8
97+ // CHECK-LE-NOOPT-NEXT: [[TMP3:%.*]] = load <256 x i1>, ptr [[TMP2]], align 32
98+ // CHECK-LE-NOOPT-NEXT: store <256 x i1> [[TMP3]], ptr [[VP]], align 32
99+ // CHECK-LE-NOOPT-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[VC1_ADDR]], align 16
100+ // CHECK-LE-NOOPT-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC2_ADDR]], align 16
101+ // CHECK-LE-NOOPT-NEXT: [[TMP6:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP5]], <16 x i8> [[TMP4]])
102+ // CHECK-LE-NOOPT-NEXT: store <256 x i1> [[TMP6]], ptr [[RES]], align 64
103+ // CHECK-LE-NOOPT-NEXT: [[TMP7:%.*]] = load <256 x i1>, ptr [[RES]], align 32
104+ // CHECK-LE-NOOPT-NEXT: [[TMP8:%.*]] = load ptr, ptr [[RESP_ADDR]], align 8
105+ // CHECK-LE-NOOPT-NEXT: store <256 x i1> [[TMP7]], ptr [[TMP8]], align 32
106+ // CHECK-LE-NOOPT-NEXT: ret void
107+ //
40108void test2 (unsigned char * vqp , unsigned char * vpp , vector unsigned char vc1 ,
41109 vector unsigned char vc2 , unsigned char * resp ) {
42110 __vector_quad vq = * ((__vector_quad * )vqp );
0 commit comments