From 38d8881a6344a5e13ac669673a6af363b0bf5006 Mon Sep 17 00:00:00 2001 From: Tobias van der Werff <33268192+tobiasvanderwerff@users.noreply.github.com> Date: Wed, 16 Oct 2024 20:41:40 +0000 Subject: [PATCH] Fix 20x slowdown of FP6 kernel due to device properties query (#1092) Replace `cudaGetDeviceProperties` with `cudaDeviceGetAttribute` --- torchao/csrc/cuda/fp6_llm/fp6_linear.cu | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/torchao/csrc/cuda/fp6_llm/fp6_linear.cu b/torchao/csrc/cuda/fp6_llm/fp6_linear.cu index b4cbe99160..978925a3f7 100644 --- a/torchao/csrc/cuda/fp6_llm/fp6_linear.cu +++ b/torchao/csrc/cuda/fp6_llm/fp6_linear.cu @@ -29,17 +29,16 @@ inline bool isSM75GPU() { int device; cudaError_t err = cudaGetDevice(&device); - if (err != cudaSuccess) { - return false; - } + if (err != cudaSuccess) return false; - cudaDeviceProp props; - err = cudaGetDeviceProperties(&props, device); - if (err != cudaSuccess) { - return false; - } + int major, minor; + err = cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, device); + if (err != cudaSuccess) return false; + + err = cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, device); + if (err != cudaSuccess) return false; - return (props.major == 7) && (props.minor == 5); + return (major == 7) && (minor == 5); } template