We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 4734eda commit c10f9c9Copy full SHA for c10f9c9
torchao/csrc/cuda/sparse_marlin/base.h
@@ -44,8 +44,8 @@ using I4 = Vec<int, 4>;
44
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#matrix-fragments-for-mma-m16n8k16-with-floating-point-type
45
using FragA = Vec<half2, 4>;
46
using FragB = Vec<half2, 2>;
47
-using FragM = Vec<uint, 1>;
+using FragM = Vec<unsigned int, 1>;
48
using FragC = Vec<float, 4>;
49
using FragS = Vec<half2, 1>; // quantization scales
50
51
-} // namespace torchao
+} // namespace torchao
0 commit comments