diff --git a/torchao/csrc/cuda/sparse_marlin/base.h b/torchao/csrc/cuda/sparse_marlin/base.h index bf81fb5d8a..513c53df3a 100644 --- a/torchao/csrc/cuda/sparse_marlin/base.h +++ b/torchao/csrc/cuda/sparse_marlin/base.h @@ -44,8 +44,8 @@ using I4 = Vec; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#matrix-fragments-for-mma-m16n8k16-with-floating-point-type using FragA = Vec; using FragB = Vec; -using FragM = Vec; +using FragM = Vec; using FragC = Vec; using FragS = Vec; // quantization scales -} // namespace torchao \ No newline at end of file +} // namespace torchao