@@ -5790,6 +5790,11 @@ static void ggml_cuda_pool_free(void * ptr, size_t size) {
57905790 CUDA_CHECK (cudaFree (ptr));
57915791}
57925792
5793+ static bool g_cublas_loaded = false ;
5794+
5795+ bool ggml_cublas_loaded (void ) {
5796+ return g_cublas_loaded;
5797+ }
57935798
57945799void ggml_init_cublas () {
57955800 static bool initialized = false ;
@@ -5803,7 +5808,12 @@ void ggml_init_cublas() {
58035808 CUDA_CHECK (cudaDeviceSynchronize ());
58045809#endif
58055810
5806- CUDA_CHECK (cudaGetDeviceCount (&g_device_count));
5811+ if (cudaGetDeviceCount (&g_device_count) != cudaSuccess) {
5812+ initialized = true ;
5813+ g_cublas_loaded = false ;
5814+ return ;
5815+ }
5816+
58075817 GGML_ASSERT (g_device_count <= GGML_CUDA_MAX_DEVICES);
58085818 int64_t total_vram = 0 ;
58095819#if defined(GGML_CUDA_FORCE_MMQ)
@@ -5851,6 +5861,7 @@ void ggml_init_cublas() {
58515861 // CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));
58525862
58535863 initialized = true ;
5864+ g_cublas_loaded = true ;
58545865 }
58555866}
58565867
@@ -7158,6 +7169,8 @@ static void ggml_cuda_rms_norm(const ggml_tensor * src0, const ggml_tensor * src
71587169}
71597170
71607171bool ggml_cuda_can_mul_mat (const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) {
7172+ if (!g_cublas_loaded) return false ;
7173+
71617174 const int64_t ne10 = src1->ne [0 ];
71627175
71637176 const int64_t ne0 = dst->ne [0 ];
@@ -7843,6 +7856,8 @@ void ggml_cuda_free_scratch() {
78437856}
78447857
78457858bool ggml_cuda_compute_forward (struct ggml_compute_params * params, struct ggml_tensor * tensor) {
7859+ if (!g_cublas_loaded) return false ;
7860+
78467861 ggml_cuda_func_t func;
78477862 const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
78487863 || (tensor->src [0 ] != nullptr && (tensor->src [0 ]->backend == GGML_BACKEND_GPU || tensor->src [0 ]->backend == GGML_BACKEND_GPU_SPLIT))
0 commit comments