@@ -1154,13 +1154,17 @@ static void quantize_row_q4_0c_reference(const float * restrict x, uint8_t * res
11541154 float id [2 ];
11551155 for (int j = 0 ; j < 2 ; j ++ ) {
11561156 float amax = 0.0f ; // absolute max
1157+ float max = 0.0f ;
11571158
11581159 for (int l = 0 ; l < QK4_0 ; l ++ ) {
11591160 const float v = xb [j ][l ];
1160- amax = MAX (amax , fabsf (v ));
1161+ if (amax < fabsf (v )) {
1162+ amax = fabsf (v );
1163+ max = v ;
1164+ }
11611165 }
11621166
1163- d [j ] = amax / (( 1 << 3 ) - 1 ) ;
1167+ d [j ] = max / -8 ;
11641168 id [j ] = d [j ] ? 1.0f /d [j ] : 0.0f ;
11651169 }
11661170
@@ -1169,10 +1173,10 @@ static void quantize_row_q4_0c_reference(const float * restrict x, uint8_t * res
11691173
11701174 for (int l = 0 ; l < QK4_0 ; l ++ ) {
11711175 const float v0 = xb [0 ][l ]* id [0 ];
1172- const uint8_t vi0 = ( int8_t )roundf (v0 ) + 8 ;
1176+ const uint8_t vi0 = MIN ( 15 , ( int8_t )roundf (v0 ) + 8 ) ;
11731177
11741178 const float v1 = xb [1 ][l ]* id [1 ];
1175- const uint8_t vi1 = ( int8_t )roundf (v1 ) + 8 ;
1179+ const uint8_t vi1 = MIN ( 15 , ( int8_t )roundf (v1 ) + 8 ) ;
11761180
11771181 assert (vi0 < 16 );
11781182 assert (vi1 < 16 );
@@ -3126,16 +3130,19 @@ static void ggml_vec_dot_q4_0c_q8_0c(const int n, float * restrict s, const void
31263130 float sumf = 0.0 ;
31273131
31283132#if defined(__ARM_NEON )
3129- const int ahead = 80 ;
31303133 float32x4_t sumv0 = vdupq_n_f32 (0.0f );
31313134 float32x4_t sumv1 = vdupq_n_f32 (0.0f );
31323135
31333136 for (int i = 0 ; i < nb /2 ; i ++ ) {
3137+ // Disable prefetching on M1 for now.
3138+ #ifndef __APPLE__
3139+ const int ahead = 80 ;
31343140 __builtin_prefetch (& xqs [i * QK4_0 + 64 * ahead ]);
31353141 __builtin_prefetch (& yqs [2 * i * QK8_0C + 64 * ahead ]);
31363142 __builtin_prefetch (& yqs [2 * i * QK8_0C + 64 * ahead + 64 ]);
31373143 __builtin_prefetch (& xds [2 * i + 64 /4 * ahead ]);
31383144 __builtin_prefetch (& yds [2 * i + 64 /4 * ahead ]);
3145+ #endif
31393146
31403147 const int dst0 = i + i /2 * 2 ; // 0, 1, 4, 5, 8, 9, ...
31413148 const int dst1 = i + i /2 * 2 + 2 ; // 2, 3, 6, 7, 10, 11 ...
@@ -9738,11 +9745,13 @@ static void ggml_compute_forward_alibi(
97389745 ggml_compute_forward_alibi_f32 (params , src0 , src1 , dst );
97399746 } break ;
97409747 case GGML_TYPE_Q4_0 :
9748+ case GGML_TYPE_Q4_0C :
97419749 case GGML_TYPE_Q4_1 :
97429750 case GGML_TYPE_Q4_2 :
97439751 case GGML_TYPE_Q5_0 :
97449752 case GGML_TYPE_Q5_1 :
97459753 case GGML_TYPE_Q8_0 :
9754+ case GGML_TYPE_Q8_0C :
97469755 case GGML_TYPE_Q8_1 :
97479756 case GGML_TYPE_I8 :
97489757 case GGML_TYPE_I16 :
0 commit comments