Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-LE
// RUN: %clang_cc1 -O3 -triple powerpc64-unknown-unknown -target-cpu pwr10 \
// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-BE
// RUN: %clang_cc1 -O0 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-LE-NOOPT

// CHECK-LE-LABEL: @test1(
// CHECK-LE-NEXT: entry:
Expand All @@ -16,6 +18,42 @@
// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2:![0-9]+]]
// CHECK-BE-NEXT: ret void
//
// CHECK-LE-NOOPT-LABEL: @test1(
// CHECK-LE-NOOPT-NEXT: entry:
// CHECK-LE-NOOPT-NEXT: [[VQP_ADDR:%.*]] = alloca ptr, align 8
// CHECK-LE-NOOPT-NEXT: [[VPP_ADDR:%.*]] = alloca ptr, align 8
// CHECK-LE-NOOPT-NEXT: [[VC1_ADDR:%.*]] = alloca <16 x i8>, align 16
// CHECK-LE-NOOPT-NEXT: [[VC2_ADDR:%.*]] = alloca <16 x i8>, align 16
// CHECK-LE-NOOPT-NEXT: [[VC3_ADDR:%.*]] = alloca <16 x i8>, align 16
// CHECK-LE-NOOPT-NEXT: [[VC4_ADDR:%.*]] = alloca <16 x i8>, align 16
// CHECK-LE-NOOPT-NEXT: [[RESP_ADDR:%.*]] = alloca ptr, align 8
// CHECK-LE-NOOPT-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64
// CHECK-LE-NOOPT-NEXT: [[VP:%.*]] = alloca <256 x i1>, align 32
// CHECK-LE-NOOPT-NEXT: [[RES:%.*]] = alloca <512 x i1>, align 64
// CHECK-LE-NOOPT-NEXT: store ptr [[VQP:%.*]], ptr [[VQP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT: store ptr [[VPP:%.*]], ptr [[VPP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC1:%.*]], ptr [[VC1_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC2:%.*]], ptr [[VC2_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC3:%.*]], ptr [[VC3_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC4:%.*]], ptr [[VC4_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT: store ptr [[RESP:%.*]], ptr [[RESP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VQP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT: [[TMP1:%.*]] = load <512 x i1>, ptr [[TMP0]], align 64
// CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP1]], ptr [[VQ]], align 64
// CHECK-LE-NOOPT-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VPP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT: [[TMP3:%.*]] = load <256 x i1>, ptr [[TMP2]], align 32
// CHECK-LE-NOOPT-NEXT: store <256 x i1> [[TMP3]], ptr [[VP]], align 32
// CHECK-LE-NOOPT-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[VC1_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC2_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[VC3_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr [[VC4_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT: [[TMP8:%.*]] = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[TMP7]], <16 x i8> [[TMP6]], <16 x i8> [[TMP5]], <16 x i8> [[TMP4]])
// CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP8]], ptr [[RES]], align 64
// CHECK-LE-NOOPT-NEXT: [[TMP9:%.*]] = load <512 x i1>, ptr [[RES]], align 64
// CHECK-LE-NOOPT-NEXT: [[TMP10:%.*]] = load ptr, ptr [[RESP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP9]], ptr [[TMP10]], align 64
// CHECK-LE-NOOPT-NEXT: ret void
//
void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, vector unsigned char vc2,
vector unsigned char vc3, vector unsigned char vc4, unsigned char *resp) {
__vector_quad vq = *((__vector_quad *)vqp);
Expand All @@ -37,6 +75,36 @@ void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, vec
// CHECK-BE-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6:![0-9]+]]
// CHECK-BE-NEXT: ret void
//
// CHECK-LE-NOOPT-LABEL: @test2(
// CHECK-LE-NOOPT-NEXT: entry:
// CHECK-LE-NOOPT-NEXT: [[VQP_ADDR:%.*]] = alloca ptr, align 8
// CHECK-LE-NOOPT-NEXT: [[VPP_ADDR:%.*]] = alloca ptr, align 8
// CHECK-LE-NOOPT-NEXT: [[VC1_ADDR:%.*]] = alloca <16 x i8>, align 16
// CHECK-LE-NOOPT-NEXT: [[VC2_ADDR:%.*]] = alloca <16 x i8>, align 16
// CHECK-LE-NOOPT-NEXT: [[RESP_ADDR:%.*]] = alloca ptr, align 8
// CHECK-LE-NOOPT-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64
// CHECK-LE-NOOPT-NEXT: [[VP:%.*]] = alloca <256 x i1>, align 32
// CHECK-LE-NOOPT-NEXT: [[RES:%.*]] = alloca <256 x i1>, align 32
// CHECK-LE-NOOPT-NEXT: store ptr [[VQP:%.*]], ptr [[VQP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT: store ptr [[VPP:%.*]], ptr [[VPP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC1:%.*]], ptr [[VC1_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC2:%.*]], ptr [[VC2_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT: store ptr [[RESP:%.*]], ptr [[RESP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VQP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT: [[TMP1:%.*]] = load <512 x i1>, ptr [[TMP0]], align 64
// CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP1]], ptr [[VQ]], align 64
// CHECK-LE-NOOPT-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VPP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT: [[TMP3:%.*]] = load <256 x i1>, ptr [[TMP2]], align 32
// CHECK-LE-NOOPT-NEXT: store <256 x i1> [[TMP3]], ptr [[VP]], align 32
// CHECK-LE-NOOPT-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[VC1_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC2_ADDR]], align 16
// CHECK-LE-NOOPT-NEXT: [[TMP6:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP5]], <16 x i8> [[TMP4]])
// CHECK-LE-NOOPT-NEXT: store <256 x i1> [[TMP6]], ptr [[RES]], align 64
// CHECK-LE-NOOPT-NEXT: [[TMP7:%.*]] = load <256 x i1>, ptr [[RES]], align 32
// CHECK-LE-NOOPT-NEXT: [[TMP8:%.*]] = load ptr, ptr [[RESP_ADDR]], align 8
// CHECK-LE-NOOPT-NEXT: store <256 x i1> [[TMP7]], ptr [[TMP8]], align 32
// CHECK-LE-NOOPT-NEXT: ret void
//
void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1,
vector unsigned char vc2, unsigned char *resp) {
__vector_quad vq = *((__vector_quad *)vqp);
Expand Down
Loading