@@ -844,6 +844,58 @@ struct CUDADeviceTy : public GenericDeviceTy {
844844 void *DstPtr, int64_t Size,
845845 AsyncInfoWrapperTy &AsyncInfoWrapper) override ;
846846
847+ Error dataFillImpl (void *TgtPtr, const void *PatternPtr, int64_t PatternSize,
848+ int64_t Size,
849+ AsyncInfoWrapperTy &AsyncInfoWrapper) override {
850+ if (auto Err = setContext ())
851+ return Err;
852+
853+ CUstream Stream;
854+ if (auto Err = getStream (AsyncInfoWrapper, Stream))
855+ return Err;
856+
857+ CUresult Res;
858+ size_t N = Size / PatternSize;
859+ if (PatternSize == 1 ) {
860+ Res = cuMemsetD8Async ((CUdeviceptr)TgtPtr, *((const uint8_t *)PatternPtr),
861+ N, Stream);
862+ } else if (PatternSize == 2 ) {
863+ Res = cuMemsetD16Async ((CUdeviceptr)TgtPtr,
864+ *((const uint16_t *)PatternPtr), N, Stream);
865+ } else if (PatternSize == 4 ) {
866+ Res = cuMemsetD32Async ((CUdeviceptr)TgtPtr,
867+ *((const uint32_t *)PatternPtr), N, Stream);
868+ } else {
869+ // For larger patterns we can do a series of strided fills to copy the
870+ // pattern efficiently
871+ int64_t MemsetSize = PatternSize % 4u == 0u ? 4u
872+ : PatternSize % 2u == 0u ? 2u
873+ : 1u ;
874+
875+ int64_t NumberOfSteps = PatternSize / MemsetSize;
876+ int64_t Pitch = NumberOfSteps * MemsetSize;
877+ int64_t Height = Size / PatternSize;
878+
879+ for (auto Step = 0u ; Step < NumberOfSteps; ++Step) {
880+ if (MemsetSize == 4 ) {
881+ Res = cuMemsetD2D32Async (
882+ (CUdeviceptr)TgtPtr + Step * MemsetSize, Pitch,
883+ *((const uint32_t *)PatternPtr + Step), 1u , Height, Stream);
884+ } else if (MemsetSize == 2 ) {
885+ Res = cuMemsetD2D16Async (
886+ (CUdeviceptr)TgtPtr + Step * MemsetSize, Pitch,
887+ *((const uint16_t *)PatternPtr + Step), 1u , Height, Stream);
888+ } else {
889+ Res = cuMemsetD2D8Async ((CUdeviceptr)TgtPtr + Step * MemsetSize,
890+ Pitch, *((const uint8_t *)PatternPtr + Step),
891+ 1u , Height, Stream);
892+ }
893+ }
894+ }
895+
896+ return Plugin::check (Res, " error in cuMemset: %s" );
897+ }
898+
847899 // / Initialize the async info for interoperability purposes.
848900 Error initAsyncInfoImpl (AsyncInfoWrapperTy &AsyncInfoWrapper) override {
849901 if (auto Err = setContext ())
0 commit comments