Skip to content

Commit ae95707

Browse files
authored
Use BSR for VNNI kernel (#147)
* Use BSR * Make sparsity configurable in UT
1 parent d038a72 commit ae95707

File tree

11 files changed

+297
-774
lines changed

11 files changed

+297
-774
lines changed

nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_spmm_vnni.hpp

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,15 @@ namespace jd {
3232
*/
3333
class jit_spmm_vnni_t : public jit_generator {
3434
public:
35-
explicit jit_spmm_vnni_t(const ssd::flat_param_t& param)
36-
: jit_generator(), param_(param), csrp_(param_.sparse_ptr) {}
35+
explicit jit_spmm_vnni_t(const ssd::flat_param_t& param);
3736
virtual ~jit_spmm_vnni_t() {}
3837

3938
public:
4039
const void* sequence_vals() const { return seq_vals_.data(); }
4140

4241
private:
4342
ssd::flat_param_t param_;
44-
csrp_data_t<int8_t>* csrp_;
43+
bsr_data_t<int8_t>* bsr_;
4544
std::vector<int8_t> seq_vals_;
4645

4746
private:
@@ -54,30 +53,26 @@ class jit_spmm_vnni_t : public jit_generator {
5453
Xbyak::Zmm dst_tile_Vmm(int i, int j); // Reg alloc of DST tile. 2D shape=(TH,TW), stride=(TW,1)
5554
void params_alias(const ssd::flat_param_t& param);
5655
void read_params();
57-
void load_bias(const std::vector<int64_t>& m_indices);
56+
void load_bias(int64_t m_start);
5857
void load_dense(const std::vector<int64_t>& k_indices);
59-
void load_sparse();
58+
void load_sparse(const int8_t* bsr_data, int64_t kp_lo, int64_t kp_hi);
6059
void tile_product(int tile_height, int tile_width);
61-
void handle_dst_buffer_init(int kb_idx, const std::vector<int64_t>& m_indices);
62-
void handle_dst_buffer_epilogue(int kb_idx, const std::vector<int64_t>& m_indices);
60+
void handle_dst_buffer_init(int kb_idx, int64_t m_start);
61+
void handle_dst_buffer_epilogue(int kb_idx, int64_t m_start);
6362
void mul_scale(int i);
6463
void move_out(int i, int j, int row_idx, int bytes = 1);
65-
std::unordered_map<int64_t, std::vector<int64_t>> get_idx_balanced(const std::vector<int64_t>& m_indices,
64+
std::unordered_map<int64_t, std::vector<int64_t>> get_idx_balanced(int64_t m_start,
6665
const std::vector<int64_t>& sparse_indptr,
6766
const std::vector<int64_t>& sparse_indices, int lo,
6867
int hi);
69-
std::unordered_map<int64_t, std::vector<int8_t>> get_val_balanced(const std::vector<int64_t>& m_indices,
68+
std::unordered_map<int64_t, std::vector<int8_t>> get_val_balanced(int64_t m_start,
7069
const std::vector<int64_t>& sparse_indptr,
7170
const std::vector<int64_t>& sparse_indices, int lo,
7271
int hi, const std::vector<int8_t>& sparse_inddata);
73-
void repeat_THx4xTW_matmal(const std::vector<int64_t>& m_indices,
74-
const std::unordered_map<int64_t, std::vector<int64_t>>& k_indices_map,
75-
const std::unordered_map<int64_t, std::vector<int8_t>>& k_inddata_map);
72+
void repeat_THx4xTW_matmal(int64_t imb);
7673
void clear_dst_tile();
77-
void load_intermediate_dst(const std::vector<int64_t>& m_indices);
78-
void store_intermediate_dst(const std::vector<int64_t>& m_indices);
79-
void save_sequence_vals(const std::vector<int64_t>& m_indices,
80-
const std::unordered_map<int64_t, std::vector<int8_t>>& k_inddata_map, int pos1, int pos2);
74+
void load_intermediate_dst(int64_t m_start);
75+
void store_intermediate_dst(int64_t m_start);
8176
void gen_sub_function();
8277

8378
private:

nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/sparse_data.hpp

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -145,23 +145,11 @@ static constexpr int ADJ = 4; // 4 is that "Multiply groups of 4 adjacent pairs
145145

146146
inline int align_nnz(const int& a_nnz) { return ceil_div(a_nnz, ADJ) * ADJ; }
147147

148-
template <typename T>
149-
sparse_data_t<T>* reorder_to(int rows, int cols, const void* uncoded_ptr, const format_type& dst_encode_fmt);
150-
151148
template <typename T, dim_t group>
152149
std::vector<bsr_data_t<T>*>* reorder_to_bsr_amx(dim_t rows, dim_t cols, dim_t micro_rows, const void* uncoded_ptr);
153150

154151
template <typename T>
155-
uint64_t get_uncoded_nnz(int rows, int cols, const T* uncoded_data, int line_idx = -1);
156-
157-
template <typename T>
158-
sparse_data_t<T> tocsr(int rows, int cols, const T* uncoded_data);
159-
160-
template <typename T>
161-
std::pair<std::vector<int64_t>, std::vector<int64_t>> csr_with_permute(int rows, int cols, const T* uncoded_data);
162-
163-
template <typename T>
164-
bsr_data_t<T> tobsr(dim_t rows, dim_t cols, dim_t blocksize[2], const T* uncoded_data);
152+
bsr_data_t<T> tobsr(dim_t rows, dim_t cols, dim_t blk_row, dim_t blk_col, const T* uncoded_data);
165153

166154
template <typename T, dim_t group>
167155
bsr_data_t<T> to_bsr_amx(dim_t rows, dim_t cols, dim_t blk_row, dim_t blk_col, const T* uncoded_data);

nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/spmm_types.hpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ template <typename T>
2525
class csrp_data_t;
2626
template <typename T>
2727
class bsc_data_t;
28+
template <typename T>
29+
class bsr_data_t;
2830
namespace ssd {
2931
/**
3032
* @brief tensors index configuration of this kernel.
@@ -56,15 +58,15 @@ struct flat_param_t {
5658
bool append_sum;
5759
data_type output_type;
5860
sparse_scheme scheme;
59-
// optimization config of JIT machine code.
6061
std::vector<int64_t> mkn_blocks;
61-
std::vector<int64_t> tile_shape;
62+
std::vector<int64_t> tile_shape; // 2d vector for microkernel shape in terms of zmm registers
6263
bool sub_func;
63-
int64_t start;
64-
int64_t end;
64+
int64_t im_start; // start m-idx of dest to be calculated
65+
int64_t im_end; // end m-idx of dest to be calculated
66+
int64_t in_start; // start n-idx of dest to be calculated
67+
int64_t in_end; // end n-idx of dest to be calculated
6568
// sparse weight related
66-
csrp_data_t<int8_t>* sparse_ptr;
67-
std::vector<int64_t> avg_group;
69+
bsr_data_t<int8_t>* sparse_ptr;
6870
};
6971

7072
/**
@@ -76,8 +78,6 @@ struct flat_data_t {
7678
const void* ptr_bias; // bias(M, 1).
7779
void* ptr_dst; // dst(M, N).
7880
const void* ptr_scales;
79-
int64_t start;
80-
int64_t end;
8181
};
8282

8383
/**

nlp_toolkit/backends/neural_engine/SparseLib/include/utils.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ T str_to_num(const std::string& s);
6363
template <typename T>
6464
std::vector<T> split_str(const std::string& s, const char& delim = ',');
6565

66+
std::string join_str(const std::vector<std::string>& ss, const std::string& delim = ",");
67+
6668
bool init_amx();
6769

6870
/**

0 commit comments

Comments
 (0)