Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions clang/include/clang/Basic/BuiltinsPPC.def
Original file line number Diff line number Diff line change
Expand Up @@ -1098,6 +1098,10 @@ UNALIASED_CUSTOM_BUILTIN(mma_dmmr, "vW1024*W1024*", false,
"mma,isa-future-instructions")
UNALIASED_CUSTOM_BUILTIN(mma_dmxor, "vW1024*W1024*", true,
"mma,isa-future-instructions")
UNALIASED_CUSTOM_BUILTIN(mma_disassemble_dmr, "vv*W1024*", false,
"mma,isa-future-instructions")
UNALIASED_CUSTOM_BUILTIN(mma_build_dmr, "vW1024*VVVVVVVV", false,
"mma,isa-future-instructions")

// MMA builtins with positive/negative multiply/accumulate.
UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvf16ger2, "vW512*VV",
Expand Down
5 changes: 4 additions & 1 deletion clang/lib/CodeGen/TargetBuiltins/PPC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1152,10 +1152,13 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
CallOps.push_back(Acc);
}
if (BuiltinID == PPC::BI__builtin_mma_dmmr ||
BuiltinID == PPC::BI__builtin_mma_dmxor) {
BuiltinID == PPC::BI__builtin_mma_dmxor ||
BuiltinID == PPC::BI__builtin_mma_disassemble_dmr) {
Address Addr = EmitPointerWithAlignment(E->getArg(1));
Ops[1] = Builder.CreateLoad(Addr);
}
if (BuiltinID == PPC::BI__builtin_mma_disassemble_dmr)
return Builder.CreateAlignedStore(Ops[1], Ops[0], MaybeAlign());
for (unsigned i=1; i<Ops.size(); i++)
CallOps.push_back(Ops[i]);
llvm::Function *F = CGM.getIntrinsic(ID);
Expand Down
36 changes: 27 additions & 9 deletions clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,18 +93,36 @@ void test_pmdmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsi
*((__dmr1024 *)resp) = vdmr;
}

// CHECK-LABEL: @test_dmf_basic
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> [[TMP0]])
// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr %res1, align 128
// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr %res2, align 128
// CHECK-NEXT: [[TMP3:%.*]] = load <1024 x i1>, ptr %p, align 128
// CHECK-NEXT: [[TMP4:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> [[TMP2]], <1024 x i1> [[TMP3]])
// CHECK-NEXT: store <1024 x i1> [[TMP4]], ptr %res2, align 128
// CHECK-LABEL: @test_dmf_basic(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> [[TMP0]])
// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RES1:%.*]], align 128
// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[RES2:%.*]], align 128
// CHECK-NEXT: [[TMP3:%.*]] = load <1024 x i1>, ptr [[P:%.*]], align 128
// CHECK-NEXT: [[TMP4:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> [[TMP2]], <1024 x i1> [[TMP3]])
// CHECK-NEXT: store <1024 x i1> [[TMP4]], ptr [[RES2]], align 128
// CHECK-NEXT: ret void
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems irrelevant to this pr.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is but it was done by the tool to update test checks, not by me, so I'm not sure there's a way around it. Note that the test itself has not been changed.

//
void test_dmf_basic(char *p, char *res1, char *res2) {
__dmr1024 x[2];
__builtin_mma_dmsetdmrz(&x[0]);
__builtin_mma_dmmr((__dmr1024*)res1, &x[0]);
__builtin_mma_dmxor((__dmr1024*)res2, (__dmr1024*)p);
}

// CHECK-LABEL: @test_dmf_basic2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[V:%.*]], align 16, !tbaa [[TBAA8:![0-9]+]]
// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]])
// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RES2:%.*]], align 128
// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[P1:%.*]], align 128
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RES1:%.*]], align 128
// CHECK-NEXT: ret void
//
void test_dmf_basic2(char *p1, char *res1, char *res2,
vector unsigned char *v) {
vector unsigned char vv = *v;
__builtin_mma_build_dmr((__dmr1024*)res2, vv, vv, vv, vv, vv, vv, vv, vv);
__builtin_mma_disassemble_dmr(res1, (__dmr1024*)p1);
}
4 changes: 4 additions & 0 deletions clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ void test_mma(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc)
__builtin_mma_dmsetdmrz(&vdmr);
__builtin_mma_dmmr(&vdmr, (__dmr1024*)vpp);
__builtin_mma_dmxor(&vdmr, (__dmr1024*)vpp);
__builtin_mma_build_dmr(&vdmr, vc, vc, vc, vc, vc, vc, vc, vc);
__builtin_mma_disassemble_dmr(vdmrp, &vdmr);

// CHECK: error: '__builtin_mma_dmxvi8gerx4' needs target feature mma,paired-vector-memops
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4' needs target feature mma,paired-vector-memops
Expand All @@ -26,4 +28,6 @@ void test_mma(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc)
// CHECK: error: '__builtin_mma_dmsetdmrz' needs target feature mma,isa-future-instructions
// CHECK: error: '__builtin_mma_dmmr' needs target feature mma,isa-future-instructions
// CHECK: error: '__builtin_mma_dmxor' needs target feature mma,isa-future-instructions
// CHECK: error: '__builtin_mma_build_dmr' needs target feature mma,isa-future-instructions
// CHECK: error: '__builtin_mma_disassemble_dmr' needs target feature mma,isa-future-instructions
}
10 changes: 10 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsPowerPC.td
Original file line number Diff line number Diff line change
Expand Up @@ -1701,6 +1701,16 @@ let TargetPrefix = "ppc" in {
DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty, llvm_v256i1_ty,
llvm_i32_ty], [IntrNoMem]>;

def int_ppc_mma_disassemble_dmr :
DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_v1024i1_ty],
[IntrWriteMem, IntrArgMemOnly]>;

def int_ppc_mma_build_dmr :
DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;

// MMA Reduced-Precision: Outer Product Intrinsic Definitions.
defm int_ppc_mma_xvi4ger8 :
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
Expand Down
40 changes: 40 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11292,6 +11292,24 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getMergeValues(RetOps, dl);
}

case Intrinsic::ppc_mma_build_dmr: {
SmallVector<SDValue, 8> Pairs;
SmallVector<SDValue, 8> Chains;
for (int i = 1; i < 9; i += 2) {
SDValue Hi = Op.getOperand(i);
SDValue Lo = Op.getOperand(i + 1);
if (Hi->getOpcode() == ISD::LOAD)
Chains.push_back(Hi.getValue(1));
if (Lo->getOpcode() == ISD::LOAD)
Chains.push_back(Lo.getValue(1));
Pairs.push_back(
DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1, {Hi, Lo}));
}
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG);
return DAG.getMergeValues({Value, TF}, dl);
}

case Intrinsic::ppc_mma_dmxxextfdmr512: {
assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
Expand Down Expand Up @@ -11628,6 +11646,10 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Op.getOperand(0)),
0);
}
case Intrinsic::ppc_mma_disassemble_dmr: {
return DAG.getStore(DAG.getEntryNode(), DL, Op.getOperand(ArgStart + 2),
Op.getOperand(ArgStart + 1), MachinePointerInfo());
}
default:
break;
}
Expand Down Expand Up @@ -12117,6 +12139,24 @@ SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
return DAG.getMergeValues({DmrPValue, TF}, dl);
}

SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
const SDLoc &dl,
SelectionDAG &DAG) const {
SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Pairs[0],
Pairs[1]),
0);
SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
Pairs[2], Pairs[3]),
0);
SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);

return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1,
{RC, Lo, LoSub, Hi, HiSub}),
0);
}

SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1361,6 +1361,8 @@ namespace llvm {
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDMFVectorStore(SDValue Op, SelectionDAG &DAG) const;
SDValue DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
const SDLoc &dl, SelectionDAG &DAG) const;

SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
CallingConv::ID CallConv, bool isVarArg,
Expand Down
63 changes: 63 additions & 0 deletions llvm/test/CodeGen/PowerPC/dmr-enable.ll
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,69 @@ entry:
ret void
}

define void @tbuild(ptr %p1, ptr %p2, ptr %res1, ptr %res2, ptr %v) {
; CHECK-LABEL: tbuild:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxv v3, 0(r7)
; CHECK-NEXT: vmr v2, v3
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp34, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT: stxvp vsp34, 96(r6)
; CHECK-NEXT: stxvp vsp36, 64(r6)
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT: stxvp vsp34, 32(r6)
; CHECK-NEXT: stxvp vsp36, 0(r6)
; CHECK-NEXT: lxvp vsp34, 0(r3)
; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-NEXT: lxvp vsp34, 64(r3)
; CHECK-NEXT: lxvp vsp36, 96(r3)
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT: stxvp vsp34, 96(r5)
; CHECK-NEXT: stxvp vsp36, 64(r5)
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-NEXT: stxvp vsp34, 32(r5)
; CHECK-NEXT: stxvp vsp36, 0(r5)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: tbuild:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv v3, 0(r7)
; CHECK-BE-NEXT: vmr v2, v3
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp34, 1
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT: stxvp vsp36, 96(r6)
; CHECK-BE-NEXT: stxvp vsp34, 64(r6)
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT: stxvp vsp36, 32(r6)
; CHECK-BE-NEXT: stxvp vsp34, 0(r6)
; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
; CHECK-BE-NEXT: blr
entry:
%0 = load <16 x i8>, ptr %v, align 16
%1 = tail call <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0)
store <1024 x i1> %1, ptr %res2, align 128
%2 = load <1024 x i1>, ptr %p1, align 128
tail call void @llvm.ppc.mma.disassemble.dmr(ptr %res1, <1024 x i1> %2)
ret void
}

declare <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
declare void @llvm.ppc.mma.disassemble.dmr(ptr, <1024 x i1>)
declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
declare <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1>)
declare <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1>, <1024 x i1>)
Expand Down