From 676bf38a7401f613aafe368aa16799abc6e8b6da Mon Sep 17 00:00:00 2001 From: Vlado Boza Date: Mon, 11 Aug 2025 11:13:04 +0200 Subject: [PATCH] Fix uint1 x fp16 packing --- bitblas/ops/lop3_permutate/lop3_permutate_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitblas/ops/lop3_permutate/lop3_permutate_impl.py b/bitblas/ops/lop3_permutate/lop3_permutate_impl.py index c5d240e69..5457d7825 100644 --- a/bitblas/ops/lop3_permutate/lop3_permutate_impl.py +++ b/bitblas/ops/lop3_permutate/lop3_permutate_impl.py @@ -78,7 +78,7 @@ def interleave_weight_f16_1b(A: T.Buffer((N, QK), storage_dtype), B: T.Buffer((N B_tmp_2[v0, v1] = ((B[v0, v1] & T.uint32(0x000000F0)) >> 4) << 8 B_tmp_3[v0, v1] = ((B[v0, v1] & T.uint32(0x00000F00)) >> 8) << 16 B_tmp_4[v0, v1] = ((B[v0, v1] & T.uint32(0x0000F000)) >> 12) << 24 - B_tmp_5[v0, v1] = ((B[v0, v1] & T.uint32(0x000F0000)) >> 16) << 8 + B_tmp_5[v0, v1] = ((B[v0, v1] & T.uint32(0x000F0000)) >> 16) << 4 B_tmp_6[v0, v1] = ((B[v0, v1] & T.uint32(0x00F00000)) >> 20) << 12 B_tmp_7[v0, v1] = ((B[v0, v1] & T.uint32(0x0F000000)) >> 24) << 20 B[v0, v1] = (