@@ -648,6 +648,13 @@ typedef struct {
648648} block_q8_0 ;
649649static_assert (sizeof (block_q8_0 ) == sizeof (float ) + QK8_0 , "wrong q8_0 block size/padding" );
650650
651+ #define QK4_0C (4*32)
652+ #define QK4_0C_MUL (QK4_0C / QK4_0)
653+ // TODO: nicer description - pseudostruct?
654+ // q4_0c : (uint8_t[QK4_0C/2]) qs[nb] || float d[n]
655+
656+ #define QK8_0C 32
657+ // q8_0c : uint8_t qs[n] || float d[n]
651658
652659// reference implementation for deterministic creation of model files
653660static void quantize_row_q4_0_reference (const float * restrict x , block_q4_0 * restrict y , int k ) {
@@ -937,6 +944,57 @@ static void quantize_row_q4_0(const float * restrict x, void * restrict vy, int
937944#endif
938945}
939946
947+ static void quantize_row_q4_0c_reference (const float * restrict x , uint8_t * restrict y , int k ) {
948+ assert (k % QK4_0C == 0 );
949+ const int nb = k / QK4_0 ;
950+ const int nsb = k / QK4_0C ;
951+
952+ // Split y into nibbles section and scales section
953+ uint8_t * restrict qs = y ;
954+ float * restrict ds = (float * ) (y + QK4_0C /2 * nsb );
955+
956+ for (int i = 0 ; i < nb /2 ; i ++ ) {
957+ // Interleave two output blocks in low and high nibbles
958+ const int src0 = i + i /2 * 2 ; // 0, 1, 4, 5, 8, 9, ...
959+ const int src1 = i + i /2 * 2 + 2 ; // 2, 3, 6, 7, 10, 11 ...
960+ const float * xb [2 ] = {
961+ x + QK4_0 * src0 , // block in low nibbles
962+ x + QK4_0 * src1 , // block in high nibbles
963+ };
964+
965+ // Find multiplier for each block
966+ float d [2 ];
967+ float id [2 ];
968+ for (int j = 0 ; j < 2 ; j ++ ) {
969+ float amax = 0.0f ; // absolute max
970+
971+ for (int l = 0 ; l < QK4_0 ; l ++ ) {
972+ const float v = xb [j ][l ];
973+ amax = MAX (amax , fabsf (v ));
974+ }
975+
976+ d [j ] = amax / ((1 << 3 ) - 1 );
977+ id [j ] = d [j ] ? 1.0f /d [j ] : 0.0f ;
978+ }
979+
980+ ds [src0 ] = d [0 ];
981+ ds [src1 ] = d [1 ];
982+
983+ for (int l = 0 ; l < QK4_0 ; l ++ ) {
984+ const float v0 = xb [0 ][l ]* id [0 ];
985+ const uint8_t vi0 = (int8_t )roundf (v0 ) + 8 ;
986+
987+ const float v1 = xb [1 ][l ]* id [1 ];
988+ const uint8_t vi1 = (int8_t )roundf (v1 ) + 8 ;
989+
990+ assert (vi0 < 16 );
991+ assert (vi1 < 16 );
992+
993+ qs [i * QK4_0 + l ] = vi0 | (vi1 << 4 );
994+ }
995+ }
996+ }
997+
940998static void quantize_row_q4_1_reference (const float * restrict x , void * restrict vy , int k ) {
941999 assert (k % QK4_1 == 0 );
9421000 const int nb = k / QK4_1 ;
@@ -1377,6 +1435,40 @@ static void quantize_row_q8_0(const float * restrict x, void * restrict vy, int
13771435#endif
13781436}
13791437
1438+ // reference implementation for deterministic creation of model files
1439+ static void quantize_row_q8_0c_reference (const float * restrict x , void * restrict y , int k ) {
1440+ assert (k % QK8_0 == 0 );
1441+ const int nb = k / QK8_0 ;
1442+
1443+ uint8_t * restrict qs = y ;
1444+ float * restrict ds = (float * ) ((uint8_t * ) y + QK8_0C * nb );
1445+
1446+ for (int i = 0 ; i < nb ; i ++ ) {
1447+ float amax = 0.0f ; // absolute max
1448+
1449+ for (int l = 0 ; l < QK8_0 ; l ++ ) {
1450+ const float v = x [i * QK8_0 + l ];
1451+ amax = MAX (amax , fabsf (v ));
1452+ }
1453+
1454+ const float d = amax / ((1 << 7 ) - 1 );
1455+ const float id = d ? 1.0f /d : 0.0f ;
1456+
1457+ ds [i ] = d ;
1458+
1459+ for (int l = 0 ; l < QK8_0 ; ++ l ) {
1460+ const float v = x [i * QK8_0 + l ]* id ;
1461+ qs [i * QK8_0 + l ] = roundf (v );
1462+ }
1463+ }
1464+ }
1465+
1466+ static void quantize_row_q8_0c (const float * restrict x , void * restrict vy , int k ) {
1467+ assert (k % QK8_0 == 0 );
1468+
1469+ quantize_row_q8_0c_reference (x , vy , k );
1470+ }
1471+
13801472static void dequantize_row_q4_0 (const void * restrict vx , float * restrict y , int k ) {
13811473 assert (k % QK4_0 == 0 );
13821474 const int nb = k / QK4_0 ;
@@ -1495,6 +1587,41 @@ static void dequantize_row_q4_0(const void * restrict vx, float * restrict y, in
14951587#endif
14961588}
14971589
1590+ static void dequantize_row_q4_0c (const void * restrict vx , float * restrict y , int k ) {
1591+ assert (k % QK4_0C == 0 );
1592+ const int nb = k / QK4_0 ;
1593+ const int nsb = k / QK4_0C ;
1594+
1595+ // Split vx into nibbles section and scales section
1596+ const uint8_t * restrict qs = vx ;
1597+ const float * restrict ds = (const float * ) ((const uint8_t * ) vx + QK4_0C /2 * nsb );
1598+
1599+ // scalar
1600+ for (int i = 0 ; i < nb /2 ; i ++ ) {
1601+ const int dst0 = i + i /2 * 2 ; // 0, 1, 4, 5, 8, 9, ...
1602+ const int dst1 = i + i /2 * 2 + 2 ; // 2, 3, 6, 7, 10, 11 ...
1603+
1604+ const float d0 = ds [dst0 ];
1605+ const float d1 = ds [dst1 ];
1606+
1607+ for (int l = 0 ; l < QK4_0 ; l ++ ) {
1608+ const uint8_t vi = qs [i * QK4_0 + l ];
1609+
1610+ const int8_t vi0 = vi & 0xf ;
1611+ const int8_t vi1 = vi >> 4 ;
1612+
1613+ const float v0 = (vi0 - 8 )* d0 ;
1614+ const float v1 = (vi1 - 8 )* d1 ;
1615+
1616+ y [dst0 * QK4_0 + l ] = v0 ;
1617+ y [dst1 * QK4_0 + l ] = v1 ;
1618+
1619+ assert (!isnan (y [dst0 * QK4_0 + l ]));
1620+ assert (!isnan (y [dst1 * QK4_0 + l ]));
1621+ }
1622+ }
1623+ }
1624+
14981625static void dequantize_row_q4_1 (const void * restrict vx , float * restrict y , int k ) {
14991626 assert (k % QK4_1 == 0 );
15001627 const int nb = k / QK4_1 ;
@@ -1631,6 +1758,7 @@ static void dequantize_row_q4_2(const void * restrict vx, float * restrict y, in
16311758}
16321759
16331760static void ggml_vec_dot_q4_0_q8_0 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy );
1761+ static void ggml_vec_dot_q4_0c_q8_0c (const int n , float * restrict s , const void * restrict vx , const void * restrict vy );
16341762static void ggml_vec_dot_q4_1_q8_0 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy );
16351763static void ggml_vec_dot_q4_2_q8_0 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy );
16361764
@@ -1642,6 +1770,14 @@ static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
16421770 .quantize_row_q_dot = quantize_row_q8_0 ,
16431771 .vec_dot_q = ggml_vec_dot_q4_0_q8_0 ,
16441772 },
1773+ [GGML_TYPE_Q4_0C ] = {
1774+ .dequantize_row_q = dequantize_row_q4_0c ,
1775+ //.quantize_row_q = quantize_row_q4_0c,
1776+ .quantize_row_q = (quantize_row_q_t ) quantize_row_q4_0c_reference ,
1777+ .quantize_row_q_reference = (quantize_row_q_t ) quantize_row_q4_0c_reference ,
1778+ .quantize_row_q_dot = quantize_row_q8_0c ,
1779+ .vec_dot_q = ggml_vec_dot_q4_0c_q8_0c ,
1780+ },
16451781 [GGML_TYPE_Q4_1 ] = {
16461782 .dequantize_row_q = dequantize_row_q4_1 ,
16471783 .quantize_row_q = quantize_row_q4_1 ,
@@ -1663,6 +1799,13 @@ static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
16631799 .quantize_row_q_dot = quantize_row_q8_0 ,
16641800 .vec_dot_q = NULL , // TODO
16651801 },
1802+ [GGML_TYPE_Q8_0C ] = {
1803+ .dequantize_row_q = NULL ,
1804+ .quantize_row_q = quantize_row_q8_0c ,
1805+ .quantize_row_q_reference = (quantize_row_q_t ) quantize_row_q8_0c_reference ,
1806+ .quantize_row_q_dot = quantize_row_q8_0c ,
1807+ .vec_dot_q = NULL ,
1808+ },
16661809};
16671810
16681811// For internal test use
@@ -2460,6 +2603,51 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
24602603 * s = sumf ;
24612604}
24622605
2606+ static void ggml_vec_dot_q4_0c_q8_0c (const int n , float * restrict s , const void * restrict vx , const void * restrict vy ) {
2607+ const int nb = n / QK4_0 ;
2608+ const int nsb = n / QK4_0C ;
2609+
2610+ assert (n % QK4_0C == 0 );
2611+
2612+ // Split into nibbles and scales sections
2613+ const uint8_t * restrict xqs = vx ;
2614+ const float * restrict xds = (const float * ) ((const uint8_t * ) vx + nsb * QK4_0C /2 );
2615+ const int8_t * restrict yqs = vy ;
2616+ const float * restrict yds = (const float * ) ((const uint8_t * ) vy + nb * QK8_0C );
2617+
2618+ float sumf = 0.0 ;
2619+
2620+ // scalar
2621+ for (int i = 0 ; i < nb /2 ; i ++ ) {
2622+ const int dst0 = i + i /2 * 2 ; // 0, 1, 4, 5, 8, 9, ...
2623+ const int dst1 = i + i /2 * 2 + 2 ; // 2, 3, 6, 7, 10, 11 ...
2624+
2625+ const float dx0 = xds [dst0 ];
2626+ const float dx1 = xds [dst1 ];
2627+ const float dy0 = yds [dst0 ];
2628+ const float dy1 = yds [dst1 ];
2629+
2630+ int sumi0 = 0 ;
2631+ int sumi1 = 0 ;
2632+
2633+ for (int l = 0 ; l < QK4_0 ; l ++ ) {
2634+ const uint8_t v0 = xqs [i * QK4_0 + l ];
2635+
2636+ const int i0 = (int8_t ) (v0 & 0xf ) - 8 ;
2637+ const int i1 = (int8_t ) (v0 >> 4 ) - 8 ;
2638+
2639+ const int i2 = yqs [dst0 * QK4_0 + l ];
2640+ const int i3 = yqs [dst1 * QK4_0 + l ];
2641+
2642+ sumi0 += i0 * i2 ;
2643+ sumi1 += i1 * i3 ;
2644+ }
2645+ sumf += dx0 * dy0 * sumi0 + dx1 * dy1 * sumi1 ;
2646+ }
2647+
2648+ * s = sumf ;
2649+ }
2650+
24632651static void ggml_vec_dot_q4_1_q8_0 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy ) {
24642652 const int nb = n / QK8_0 ;
24652653
@@ -3004,54 +3192,62 @@ static const int GGML_BLCK_SIZE[GGML_TYPE_COUNT] = {
30043192 [GGML_TYPE_F32 ] = 1 ,
30053193 [GGML_TYPE_F16 ] = 1 ,
30063194 [GGML_TYPE_Q4_0 ] = QK4_0 ,
3195+ [GGML_TYPE_Q4_0C ] = QK4_0C ,
30073196 [GGML_TYPE_Q4_1 ] = QK4_1 ,
30083197 [GGML_TYPE_Q4_2 ] = QK4_2 ,
30093198 [GGML_TYPE_Q8_0 ] = QK8_0 ,
3199+ [GGML_TYPE_Q8_0C ] = QK8_0C ,
30103200 [GGML_TYPE_I8 ] = 1 ,
30113201 [GGML_TYPE_I16 ] = 1 ,
30123202 [GGML_TYPE_I32 ] = 1 ,
30133203};
3014- static_assert (GGML_TYPE_COUNT == 9 , "GGML_BLCK_SIZE is outdated" );
3204+ static_assert (GGML_TYPE_COUNT == 11 , "GGML_BLCK_SIZE is outdated" );
30153205
30163206static const size_t GGML_TYPE_SIZE [GGML_TYPE_COUNT ] = {
30173207 [GGML_TYPE_F32 ] = sizeof (float ),
30183208 [GGML_TYPE_F16 ] = sizeof (ggml_fp16_t ),
30193209 [GGML_TYPE_Q4_0 ] = sizeof (block_q4_0 ),
3210+ [GGML_TYPE_Q4_0C ] = 4 * sizeof (block_q4_0 ),
30203211 [GGML_TYPE_Q4_1 ] = sizeof (block_q4_1 ),
30213212 [GGML_TYPE_Q4_2 ] = sizeof (block_q4_2 ),
30223213 [GGML_TYPE_Q8_0 ] = sizeof (block_q8_0 ),
3214+ [GGML_TYPE_Q8_0C ] = sizeof (block_q8_0 ),
30233215 [GGML_TYPE_I8 ] = sizeof (int8_t ),
30243216 [GGML_TYPE_I16 ] = sizeof (int16_t ),
30253217 [GGML_TYPE_I32 ] = sizeof (int32_t ),
30263218};
3027- static_assert (GGML_TYPE_COUNT == 9 , "GGML_TYPE_SIZE is outdated" );
3219+ static_assert (GGML_TYPE_COUNT == 11 , "GGML_TYPE_SIZE is outdated" );
30283220
30293221
30303222static const char * GGML_TYPE_NAME [GGML_TYPE_COUNT ] = {
30313223 [GGML_TYPE_F32 ] = "f32" ,
30323224 [GGML_TYPE_F16 ] = "f16" ,
30333225 [GGML_TYPE_Q4_0 ] = "q4_0" ,
3226+ [GGML_TYPE_Q4_0C ] = "q4_0c" ,
30343227 [GGML_TYPE_Q4_1 ] = "q4_1" ,
30353228 [GGML_TYPE_Q4_2 ] = "q4_2" ,
30363229 [GGML_TYPE_Q8_0 ] = "q8_0" ,
3230+ [GGML_TYPE_Q8_0C ] = "q8_0c" ,
30373231 [GGML_TYPE_I8 ] = "i8" ,
30383232 [GGML_TYPE_I16 ] = "i16" ,
30393233 [GGML_TYPE_I32 ] = "i32" ,
30403234};
3041- static_assert (GGML_TYPE_COUNT == 9 , "GGML_TYPE_NAME is outdated" );
3235+ static_assert (GGML_TYPE_COUNT == 11 , "GGML_TYPE_NAME is outdated" );
30423236
30433237static bool GGML_IS_QUANTIZED [GGML_TYPE_COUNT ] = {
30443238 [GGML_TYPE_F32 ] = false,
30453239 [GGML_TYPE_F16 ] = false,
30463240 [GGML_TYPE_Q4_0 ] = true,
3241+ [GGML_TYPE_Q4_0C ] = true,
30473242 [GGML_TYPE_Q4_1 ] = true,
30483243 [GGML_TYPE_Q4_2 ] = true,
30493244 [GGML_TYPE_Q8_0 ] = true,
3245+ [GGML_TYPE_Q8_0C ] = true,
30503246 [GGML_TYPE_I8 ] = false,
30513247 [GGML_TYPE_I16 ] = false,
30523248 [GGML_TYPE_I32 ] = false,
30533249};
3054- static_assert (GGML_TYPE_COUNT == 9 , "GGML_IS_QUANTIZED is outdated" );
3250+ static_assert (GGML_TYPE_COUNT == 11 , "GGML_IS_QUANTIZED is outdated" );
30553251
30563252static const char * GGML_OP_LABEL [GGML_OP_COUNT ] = {
30573253 "NONE" ,
@@ -7873,9 +8069,11 @@ static void ggml_compute_forward_mul_mat(
78738069 struct ggml_tensor * dst ) {
78748070 switch (src0 -> type ) {
78758071 case GGML_TYPE_Q4_0 :
8072+ case GGML_TYPE_Q4_0C :
78768073 case GGML_TYPE_Q4_1 :
78778074 case GGML_TYPE_Q4_2 :
78788075 case GGML_TYPE_Q8_0 :
8076+ case GGML_TYPE_Q8_0C :
78798077 {
78808078 ggml_compute_forward_mul_mat_q_f32 (params , src0 , src1 , dst );
78818079 } break ;
@@ -8129,9 +8327,11 @@ static void ggml_compute_forward_get_rows(
81298327 struct ggml_tensor * dst ) {
81308328 switch (src0 -> type ) {
81318329 case GGML_TYPE_Q4_0 :
8330+ case GGML_TYPE_Q4_0C :
81328331 case GGML_TYPE_Q4_1 :
81338332 case GGML_TYPE_Q4_2 :
81348333 case GGML_TYPE_Q8_0 :
8334+ case GGML_TYPE_Q8_0C :
81358335 {
81368336 ggml_compute_forward_get_rows_q (params , src0 , src1 , dst );
81378337 } break ;
0 commit comments