@@ -201,63 +201,43 @@ static __device__ __forceinline__ int __dp4a(const int a, const int b, int c) {
201201
202202static_assert (sizeof (half) == sizeof (ggml_fp16_t ), " wrong fp16 size" );
203203
204- #define CUDA_CHECK (err ) \
205- do { \
206- cudaError_t err_ = (err); \
207- if (err_ != cudaSuccess) { \
208- int id; \
209- cudaGetDevice (&id); \
210- fprintf (stderr, " \n CUDA error %d at %s:%d: %s\n " , err_, __FILE__, __LINE__, \
211- cudaGetErrorString (err_)); \
212- fprintf (stderr, " current device: %d\n " , id); \
213- GGML_ASSERT (!" CUDA error" ); \
214- } \
215- } while (0 )
216-
217- // driver API
218- #define CU_CHECK (err ) \
219- do { \
220- CUresult err_ = (err); \
221- if (err_ != CUDA_SUCCESS) { \
222- int id; \
223- cuDeviceGet (&id, 0 ); \
224- const char * err_str; \
225- cuGetErrorString (err_, &err_str); \
226- fprintf (stderr, " \n CUDA error %d at %s:%d: %s\n " , err_, __FILE__, __LINE__, \
227- err_str); \
228- fprintf (stderr, " %s\n " , #err); \
229- fprintf (stderr, " current device: %d\n " , id); \
230- GGML_ASSERT (!" CUDA error" ); \
231- } \
232- } while (0 )
233-
234-
235204#if CUDART_VERSION >= 12000
236- #define CUBLAS_CHECK (err ) \
237- do { \
238- cublasStatus_t err_ = (err); \
239- if (err_ != CUBLAS_STATUS_SUCCESS) { \
240- int id; \
241- cudaGetDevice (&id); \
242- fprintf (stderr, " \n cuBLAS error %d at %s:%d: %s\n " , \
243- err_, __FILE__, __LINE__, cublasGetStatusString (err_)); \
244- fprintf (stderr, " current device: %d\n " , id); \
245- GGML_ASSERT (!" cuBLAS error" ); \
246- } \
247- } while (0 )
205+ static const char * cublas_get_error_str (const cublasStatus_t err) {
206+ return cublasGetStatusString (err);
207+ }
248208#else
249- #define CUBLAS_CHECK (err ) \
250- do { \
251- cublasStatus_t err_ = (err); \
252- if (err_ != CUBLAS_STATUS_SUCCESS) { \
253- int id; \
254- cudaGetDevice (&id); \
255- fprintf (stderr, " \n cuBLAS error %d at %s:%d\n " , err_, __FILE__, __LINE__); \
256- fprintf (stderr, " current device: %d\n " , id); \
257- GGML_ASSERT (!" cuBLAS error" ); \
258- } \
259- } while (0 )
260- #endif // CUDART_VERSION >= 11
209+ static const char * cublas_get_error_str (const cublasStatus_t err) {
210+ switch (err) {
211+ case CUBLAS_STATUS_SUCCESS: return " CUBLAS_STATUS_SUCCESS" ;
212+ case CUBLAS_STATUS_NOT_INITIALIZED: return " CUBLAS_STATUS_NOT_INITIALIZED" ;
213+ case CUBLAS_STATUS_ALLOC_FAILED: return " CUBLAS_STATUS_ALLOC_FAILED" ;
214+ case CUBLAS_STATUS_INVALID_VALUE: return " CUBLAS_STATUS_INVALID_VALUE" ;
215+ case CUBLAS_STATUS_ARCH_MISMATCH: return " CUBLAS_STATUS_ARCH_MISMATCH" ;
216+ case CUBLAS_STATUS_MAPPING_ERROR: return " CUBLAS_STATUS_MAPPING_ERROR" ;
217+ case CUBLAS_STATUS_EXECUTION_FAILED: return " CUBLAS_STATUS_EXECUTION_FAILED" ;
218+ case CUBLAS_STATUS_INTERNAL_ERROR: return " CUBLAS_STATUS_INTERNAL_ERROR" ;
219+ case CUBLAS_STATUS_NOT_SUPPORTED: return " CUBLAS_STATUS_NOT_SUPPORTED" ;
220+ case CUBLAS_STATUS_LICENSE_ERROR: return " CUBLAS_STATUS_LICENSE_ERROR" ;
221+ default : return " unknown error" ;
222+ }
223+ #endif // CUDART_VERSION >= 12000
224+
225+ static const char * cu_get_error_str (CUresult err) {
226+ const char * err_str;
227+ cuGetErrorString (err, &err_str);
228+ return err_str;
229+ }
230+
231+ [[noreturn]]
232+ static void ggml_cuda_error (const char * stmt, const char * func, const char * file, const int line, const char * msg) {
233+ fprintf (stderr, " CUDA error: %s: %s\n " , stmt, msg);
234+ fprintf (stderr, " in function %s at %s:%d\n " , func, file, line);
235+ GGML_ASSERT (!" CUDA error" );
236+ }
237+
238+ #define CUDA_CHECK (err ) do { auto err_ = (err); if (err_ != cudaSuccess) ggml_cuda_error (#err, __func__, __FILE__, __LINE__, cudaGetErrorString (err_)); } while (0 )
239+ #define CUBLAS_CHECK (err ) do { auto err_ = (err); if (err_ != CUBLAS_STATUS_SUCCESS) ggml_cuda_error (#err, __func__, __FILE__, __LINE__, cublas_get_error_str (err_)); } while (0 )
240+ #define CU_CHECK (err ) do { auto err_ = (err); if (err_ != CUDA_SUCCESS) ggml_cuda_error (#err, __func__, __FILE__, __LINE__, cu_get_error_str (err_)); } while (0 )
261241
262242#if CUDART_VERSION >= 11100
263243#define GGML_CUDA_ASSUME (x ) __builtin_assume(x)
@@ -537,13 +517,13 @@ static int g_device_count = -1;
537517static int g_main_device = 0 ;
538518static float g_tensor_split[GGML_CUDA_MAX_DEVICES] = {0 };
539519
540- struct device_capabilities {
520+ struct cuda_device_capabilities {
541521 int cc; // compute capability
542522 bool vmm; // virtual memory support
543523 size_t vmm_granularity; // granularity of virtual memory
544524};
545525
546- static device_capabilities g_device_caps[GGML_CUDA_MAX_DEVICES] = { {0 , false , 0 } };
526+ static cuda_device_capabilities g_device_caps[GGML_CUDA_MAX_DEVICES] = { {0 , false , 0 } };
547527
548528
549529static void * g_scratch_buffer = nullptr ;
0 commit comments