@@ -544,6 +544,18 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) {
544544 return Builder.CreateFMul (X, PiOver180);
545545}
546546
547+ static Value* createCombinedi32toi64Expansion (IRBuilder<> &Builder, Value *LoBytes, Value *HighBytes) {
548+ // For int64, manually combine two int32s
549+ // First, zero-extend both values to i64
550+ Value *Lo = Builder.CreateZExt (LoBytes, Builder.getInt64Ty ());
551+ Value *Hi =
552+ Builder.CreateZExt (HighBytes, Builder.getInt64Ty ());
553+ // Shift the high bits left by 32 bits
554+ Value *ShiftedHi = Builder.CreateShl (Hi, Builder.getInt64 (32 ));
555+ // OR the high and low bits together
556+ return Builder.CreateOr (Lo, ShiftedHi);
557+ }
558+
547559static bool expandTypedBufferLoadIntrinsic (CallInst *Orig) {
548560 IRBuilder<> Builder (Orig);
549561
@@ -579,22 +591,14 @@ static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
579591 Value *Result = PoisonValue::get (BufferTy);
580592 for (unsigned I = 0 ; I < ExtractNum; I += 2 ) {
581593 Value *Combined = nullptr ;
582- if (IsDouble) {
594+ if (IsDouble)
583595 // For doubles, use dx_asdouble intrinsic
584596 Combined =
585597 Builder.CreateIntrinsic (Builder.getDoubleTy (), Intrinsic::dx_asdouble,
586598 {ExtractElements[I], ExtractElements[I + 1 ]});
587- } else {
588- // For int64, manually combine two int32s
589- // First, zero-extend both values to i64
590- Value *Lo = Builder.CreateZExt (ExtractElements[I], Builder.getInt64Ty ());
591- Value *Hi =
592- Builder.CreateZExt (ExtractElements[I + 1 ], Builder.getInt64Ty ());
593- // Shift the high bits left by 32 bits
594- Value *ShiftedHi = Builder.CreateShl (Hi, Builder.getInt64 (32 ));
595- // OR the high and low bits together
596- Combined = Builder.CreateOr (Lo, ShiftedHi);
597- }
599+ else
600+ Combined =
601+ createCombinedi32toi64Expansion (Builder, ExtractElements[I], ExtractElements[I + 1 ]);
598602
599603 if (ExtractNum == 4 )
600604 Result = Builder.CreateInsertElement (Result, Combined,
@@ -650,60 +654,42 @@ static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) {
650654 Type *Int32Ty = Builder.getInt32Ty ();
651655 Type *ResultTy = VectorType::get (Int32Ty, IsVector ? 4 : 2 , false );
652656 Value *Val = PoisonValue::get (ResultTy);
653-
657+
658+ // Handle double type(s)
659+ Type *SplitElementTy = Int32Ty;
660+ if (IsVector)
661+ SplitElementTy = VectorType::get (SplitElementTy, 2 , false );
662+
663+ Value *LowBits = nullptr ;
664+ Value *HighBits = nullptr ;
654665 // Split the 64-bit values into 32-bit components
655666 if (IsDouble) {
656- // Handle double type(s)
657- Type *SplitElementTy = Int32Ty;
658- if (IsVector)
659- SplitElementTy = VectorType::get (SplitElementTy, 2 , false );
660-
661667 auto *SplitTy = llvm::StructType::get (SplitElementTy, SplitElementTy);
662668 Value *Split = Builder.CreateIntrinsic (SplitTy, Intrinsic::dx_splitdouble,
663669 {Orig->getOperand (2 )});
664- Value *LowBits = Builder.CreateExtractValue (Split, 0 );
665- Value *HighBits = Builder.CreateExtractValue (Split, 1 );
666-
667- if (IsVector) {
668- // For vector doubles, use shuffle to create the final vector
669- Val = Builder.CreateShuffleVector (LowBits, HighBits, {0 , 2 , 1 , 3 });
670- } else {
671- // For scalar doubles, insert the elements
672- Val = Builder.CreateInsertElement (Val, LowBits, Builder.getInt32 (0 ));
673- Val = Builder.CreateInsertElement (Val, HighBits, Builder.getInt32 (1 ));
674- }
670+ LowBits = Builder.CreateExtractValue (Split, 0 );
671+ HighBits = Builder.CreateExtractValue (Split, 1 );
675672 } else {
676673 // Handle int64 type(s)
677674 Value *InputVal = Orig->getOperand (2 );
675+ Constant *ShiftAmt = Builder.getInt64 (32 );
676+ if (IsVector)
677+ ShiftAmt = ConstantVector::getSplat (ElementCount::getFixed (2 ), ShiftAmt);
678678
679- if (IsVector) {
680- // Handle vector of int64
681- for (unsigned I = 0 ; I < 2 ; ++I) {
682- // Extract each int64 element
683- Value *Int64Val =
684- Builder.CreateExtractElement (InputVal, Builder.getInt32 (I));
685-
686- // Split into low and high 32-bit parts
687- Value *LowBits = Builder.CreateTrunc (Int64Val, Int32Ty);
688- Value *ShiftedVal = Builder.CreateLShr (Int64Val, Builder.getInt64 (32 ));
689- Value *HighBits = Builder.CreateTrunc (ShiftedVal, Int32Ty);
690-
691- // Insert into result vector
692- Val =
693- Builder.CreateInsertElement (Val, LowBits, Builder.getInt32 (I * 2 ));
694- Val = Builder.CreateInsertElement (Val, HighBits,
695- Builder.getInt32 (I * 2 + 1 ));
696- }
697- } else {
698- // Handle scalar int64
699- Value *LowBits = Builder.CreateTrunc (InputVal, Int32Ty);
700- Value *ShiftedVal = Builder.CreateLShr (InputVal, Builder.getInt64 (32 ));
701- Value *HighBits = Builder.CreateTrunc (ShiftedVal, Int32Ty);
679+ // Split into low and high 32-bit parts
680+ LowBits = Builder.CreateTrunc (InputVal, SplitElementTy);
681+ Value *ShiftedVal = Builder.CreateLShr (InputVal, ShiftAmt);
682+ HighBits = Builder.CreateTrunc (ShiftedVal, SplitElementTy);
683+ }
702684
685+ if (IsVector) {
686+ // For vector doubles, use shuffle to create the final vector
687+ Val = Builder.CreateShuffleVector (LowBits, HighBits, {0 , 2 , 1 , 3 });
688+ } else {
689+ // For scalar doubles, insert the elements
703690 Val = Builder.CreateInsertElement (Val, LowBits, Builder.getInt32 (0 ));
704691 Val = Builder.CreateInsertElement (Val, HighBits, Builder.getInt32 (1 ));
705692 }
706- }
707693
708694 // Create the final intrinsic call
709695 Builder.CreateIntrinsic (Builder.getVoidTy (),
0 commit comments