From 349cad1e4d055769ec53f11946a454ba8f77be3d Mon Sep 17 00:00:00 2001
From: catswe <212922539+catswe@users.noreply.github.com>
Date: Sun, 5 Oct 2025 12:47:23 -0500
Subject: [PATCH] Update semi_structured_sparse.py

On my environment with an A100, I only see a speedup when enabling the following line

SparseSemiStructuredTensor._FORCE_CUTLASS = True
---
 advanced_source/semi_structured_sparse.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/advanced_source/semi_structured_sparse.py b/advanced_source/semi_structured_sparse.py
index e4bca79b9a..be563db384 100644
--- a/advanced_source/semi_structured_sparse.py
+++ b/advanced_source/semi_structured_sparse.py
@@ -55,6 +55,9 @@
 from torch.sparse import to_sparse_semi_structured, SparseSemiStructuredTensor
 from torch.utils.benchmark import Timer
 
+# the following line may need to be enabled to see a speedup
+# SparseSemiStructuredTensor._FORCE_CUTLASS = True
+
 # mask Linear weight to be 2:4 sparse
 mask = torch.Tensor([0, 0, 1, 1]).tile((3072, 2560)).cuda().bool()
 linear = torch.nn.Linear(10240, 3072).half().cuda().eval()