Skip to content

Commit 5597614

Browse files
committed
32 bit cache (slower)
1 parent bdd1e4d commit 5597614

File tree

1 file changed

+9
-2
lines changed

1 file changed

+9
-2
lines changed

ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,16 @@ layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
99
layout (constant_id = 0) const uint BLOCK_SIZE = 32;
1010
layout (constant_id = 1) const uint NUM_ROWS = 1;
1111

12+
// a 32 bit cache potentially might write faster due to banking
13+
struct block_q6_K_32stor
14+
{
15+
uint32_t blk[104];
16+
float16_t d;
17+
};
18+
1219
shared FLOAT_TYPE tmpsh[NUM_ROWS][BLOCK_SIZE];
1320
shared FLOAT_TYPE sccache[BLOCK_SIZE/16][16];
14-
shared block_q6_K_packed16 blkcache[BLOCK_SIZE/16];
21+
shared block_q6_K_32stor blkcache[BLOCK_SIZE/16];
1522

1623
uint fill_blkcache_its(uint wg_size) {
1724
// subgroup sizes are always a power of 2
@@ -31,7 +38,7 @@ void fill_blkcache(const int num_blocks, const uint ib0, const uint i0, const ui
3138
[[unroll]] for (int l = 0; l < num_blocks; ++l) {
3239
[[unroll]] for (int m = 0; m < fbi; ++m)
3340
// cache full superblock into shared memory with coalesced reads
34-
blkcache[l].blk[tid + m*bc_t] = data_a_packed16[ib0 + i0 + l].blk[tid + m*bc_t];
41+
blkcache[l].blk[tid + m*bc_t] = uint32_t(data_a_packed16[ib0 + i0 + l].blk[tid + m*bc_t]);
3542
}
3643
}
3744
}

0 commit comments

Comments
 (0)