@@ -3641,9 +3641,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
36413641 setOriginForNaryOp (I);
36423642 }
36433643
3644- // Get an MMX-sized vector type.
3645- Type *getMMXVectorTy (unsigned EltSizeInBits) {
3646- const unsigned X86_MMXSizeInBits = 64 ;
3644+ // Get an MMX-sized (64-bit) vector type, or optionally, other sized
3645+ // vectors.
3646+ Type *getMMXVectorTy (unsigned EltSizeInBits,
3647+ unsigned X86_MMXSizeInBits = 64 ) {
36473648 assert (EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
36483649 " Illegal MMX vector element size" );
36493650 return FixedVectorType::get (IntegerType::get (*MS.C , EltSizeInBits),
@@ -3843,20 +3844,109 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
38433844 setOriginForNaryOp (I);
38443845 }
38453846
3846- // Instrument multiply-add intrinsic.
3847- void handleVectorPmaddIntrinsic (IntrinsicInst &I,
3848- unsigned MMXEltSizeInBits = 0 ) {
3849- Type *ResTy =
3850- MMXEltSizeInBits ? getMMXVectorTy (MMXEltSizeInBits * 2 ) : I.getType ();
3847+ // Instrument multiply-add intrinsics.
3848+ //
3849+ // e.g., Two operands:
3850+ // <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a, <8 x i16> %b)
3851+ //
3852+ // Two operands which require an EltSizeInBits override:
3853+ // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %a, <1 x i64> %b)
3854+ //
3855+ // Three operands are not implemented yet:
3856+ // <4 x i32> @llvm.x86.avx512.vpdpbusd.128
3857+ // (<4 x i32> %s, <4 x i32> %a, <4 x i32> %b)
3858+ // (the result of multiply-add'ing %a and %b is accumulated with %s)
3859+ void handleVectorPmaddIntrinsic (IntrinsicInst &I, unsigned ReductionFactor,
3860+ unsigned EltSizeInBits = 0 ) {
38513861 IRBuilder<> IRB (&I);
3852- auto *Shadow0 = getShadow (&I, 0 );
3853- auto *Shadow1 = getShadow (&I, 1 );
3854- Value *S = IRB.CreateOr (Shadow0, Shadow1);
3855- S = IRB.CreateBitCast (S, ResTy);
3856- S = IRB.CreateSExt (IRB.CreateICmpNE (S, Constant::getNullValue (ResTy)),
3857- ResTy);
3858- S = IRB.CreateBitCast (S, getShadowTy (&I));
3859- setShadow (&I, S);
3862+
3863+ [[maybe_unused]] FixedVectorType *ReturnType =
3864+ cast<FixedVectorType>(I.getType ());
3865+ assert (isa<FixedVectorType>(ReturnType));
3866+
3867+ assert (I.arg_size () == 2 );
3868+
3869+ // Vectors A and B, and shadows
3870+ Value *Va = I.getOperand (0 );
3871+ Value *Vb = I.getOperand (1 );
3872+
3873+ Value *Sa = getShadow (&I, 0 );
3874+ Value *Sb = getShadow (&I, 1 );
3875+
3876+ FixedVectorType *ParamType =
3877+ cast<FixedVectorType>(I.getArgOperand (0 )->getType ());
3878+ assert (ParamType == I.getArgOperand (1 )->getType ());
3879+
3880+ assert (ParamType->getPrimitiveSizeInBits () ==
3881+ ReturnType->getPrimitiveSizeInBits ());
3882+
3883+ FixedVectorType *ImplicitReturnType = ReturnType;
3884+ // Step 1: instrument multiplication of corresponding vector elements
3885+ if (EltSizeInBits) {
3886+ ImplicitReturnType = cast<FixedVectorType>(getMMXVectorTy (
3887+ EltSizeInBits * 2 , ParamType->getPrimitiveSizeInBits ()));
3888+ ParamType = cast<FixedVectorType>(
3889+ getMMXVectorTy (EltSizeInBits, ParamType->getPrimitiveSizeInBits ()));
3890+
3891+ Va = IRB.CreateBitCast (Va, ParamType);
3892+ Vb = IRB.CreateBitCast (Vb, ParamType);
3893+
3894+ Sa = IRB.CreateBitCast (Sa, getShadowTy (ParamType));
3895+ Sb = IRB.CreateBitCast (Sb, getShadowTy (ParamType));
3896+ } else {
3897+ assert (ParamType->getNumElements () ==
3898+ ReturnType->getNumElements () * ReductionFactor);
3899+ }
3900+
3901+ // Multiplying an *initialized* zero by an uninitialized element results in
3902+ // an initialized zero element.
3903+ //
3904+ // This is analogous to bitwise AND, where "AND" of 0 and a poisoned value
3905+ // results in an unpoisoned value. We can therefore adapt the visitAnd()
3906+ // instrumentation:
3907+ // OutShadow = (SaNonZero & SbNonZero)
3908+ // | (VaNonZero & SbNonZero)
3909+ // | (SaNonZero & VbNonZero)
3910+ // where non-zero is checked on a per-element basis (not per bit).
3911+ Value *SZero = Constant::getNullValue (Va->getType ());
3912+ Value *VZero = Constant::getNullValue (Sa->getType ());
3913+ Value *SaNonZero = IRB.CreateICmpNE (Sa, SZero);
3914+ Value *SbNonZero = IRB.CreateICmpNE (Sb, SZero);
3915+ Value *VaNonZero = IRB.CreateICmpNE (Va, VZero);
3916+ Value *VbNonZero = IRB.CreateICmpNE (Vb, VZero);
3917+
3918+ Value *SaAndSbNonZero = IRB.CreateAnd (SaNonZero, SbNonZero);
3919+ Value *VaAndSbNonZero = IRB.CreateAnd (VaNonZero, SbNonZero);
3920+ Value *SaAndVbNonZero = IRB.CreateAnd (SaNonZero, VbNonZero);
3921+
3922+ // Each element of the vector is represented by a single bit (poisoned or
3923+ // not) e.g., <8 x i1>.
3924+ Value *And = IRB.CreateOr ({SaAndSbNonZero, VaAndSbNonZero, SaAndVbNonZero});
3925+
3926+ // Extend <8 x i1> to <8 x i16>.
3927+ // (The real pmadd intrinsic would have computed intermediate values of
3928+ // <8 x i32>, but that is irrelevant for our shadow purposes because we
3929+ // consider each element to be either fully initialized or fully
3930+ // uninitialized.)
3931+ And = IRB.CreateSExt (And, Sa->getType ());
3932+
3933+ // Step 2: instrument horizontal add
3934+ // We don't need bit-precise horizontalReduce because we only want to check
3935+ // if each pair of elements is fully zero.
3936+ // Cast to <4 x i32>.
3937+ Value *Horizontal = IRB.CreateBitCast (And, ImplicitReturnType);
3938+
3939+ // Compute <4 x i1>, then extend back to <4 x i32>.
3940+ Value *OutShadow = IRB.CreateSExt (
3941+ IRB.CreateICmpNE (Horizontal,
3942+ Constant::getNullValue (Horizontal->getType ())),
3943+ ImplicitReturnType);
3944+
3945+ // For MMX, cast it back to the required fake return type (<1 x i64>).
3946+ if (EltSizeInBits)
3947+ OutShadow = CreateShadowCast (IRB, OutShadow, getShadowTy (&I));
3948+
3949+ setShadow (&I, OutShadow);
38603950 setOriginForNaryOp (I);
38613951 }
38623952
@@ -5391,19 +5481,28 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
53915481 handleVectorSadIntrinsic (I);
53925482 break ;
53935483
5484+ // Multiply and Add Packed Words
5485+ // < 4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>)
5486+ // < 8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>)
5487+ //
5488+ // Multiply and Add Packed Signed and Unsigned Bytes
5489+ // < 8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>)
5490+ // <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>)
53945491 case Intrinsic::x86_sse2_pmadd_wd:
53955492 case Intrinsic::x86_avx2_pmadd_wd:
53965493 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
53975494 case Intrinsic::x86_avx2_pmadd_ub_sw:
5398- handleVectorPmaddIntrinsic (I);
5495+ handleVectorPmaddIntrinsic (I, /* ReductionFactor= */ 2 );
53995496 break ;
54005497
5498+ // <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>)
54015499 case Intrinsic::x86_ssse3_pmadd_ub_sw:
5402- handleVectorPmaddIntrinsic (I, 8 );
5500+ handleVectorPmaddIntrinsic (I, /* ReductionFactor= */ 2 , /* EltSize= */ 8 );
54035501 break ;
54045502
5503+ // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>)
54055504 case Intrinsic::x86_mmx_pmadd_wd:
5406- handleVectorPmaddIntrinsic (I, 16 );
5505+ handleVectorPmaddIntrinsic (I, /* ReductionFactor= */ 2 , /* EltSize= */ 16 );
54075506 break ;
54085507
54095508 case Intrinsic::x86_sse_cmp_ss:
0 commit comments