Skip to content

Commit 77836c3

Browse files
CISCyael-works
authored andcommitted
cuda : add missing F32<->I32 entries in ggml_cuda_cpy_fn (ggml-org#16060)
1 parent 490528d commit 77836c3

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

ggml/src/ggml-cuda/cpy.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,10 @@ void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1) {
441441
return (void*) cpy_flt<cpy_1_flt<nv_bfloat16, nv_bfloat16>>;
442442
} else if (src0->type == GGML_TYPE_BF16 && src1->type == GGML_TYPE_F32) {
443443
return (void*) cpy_flt<cpy_1_flt<nv_bfloat16, float>>;
444+
} else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_I32) {
445+
return (void*) cpy_flt<cpy_1_flt<float, int32_t>>;
446+
} else if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_F32) {
447+
return (void*) cpy_flt<cpy_1_flt<int32_t, float>>;
444448
} else {
445449
GGML_ABORT("%s: unsupported type combination (%s to %s)\n", __func__,
446450
ggml_type_name(src0->type), ggml_type_name(src1->type));

0 commit comments

Comments
 (0)