From 349cad1e4d055769ec53f11946a454ba8f77be3d Mon Sep 17 00:00:00 2001 From: catswe <212922539+catswe@users.noreply.github.com> Date: Sun, 5 Oct 2025 12:47:23 -0500 Subject: [PATCH] Update semi_structured_sparse.py On my environment with an A100, I only see a speedup when enabling the following line SparseSemiStructuredTensor._FORCE_CUTLASS = True --- advanced_source/semi_structured_sparse.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/advanced_source/semi_structured_sparse.py b/advanced_source/semi_structured_sparse.py index e4bca79b9a..be563db384 100644 --- a/advanced_source/semi_structured_sparse.py +++ b/advanced_source/semi_structured_sparse.py @@ -55,6 +55,9 @@ from torch.sparse import to_sparse_semi_structured, SparseSemiStructuredTensor from torch.utils.benchmark import Timer +# the following line may need to be enabled to see a speedup +# SparseSemiStructuredTensor._FORCE_CUTLASS = True + # mask Linear weight to be 2:4 sparse mask = torch.Tensor([0, 0, 1, 1]).tile((3072, 2560)).cuda().bool() linear = torch.nn.Linear(10240, 3072).half().cuda().eval()