1818#define WARMUP 5
1919#define ITERATIONS 10
2020
21- #define L1_SIZE 32 *100
22- #define L2_SIZE 32 *2000
23- #define L3_SIZE 32 *20000
24- #define MEM_SIZE 32 *2000000
21+ #define L1_SIZE 32 *128
22+ #define L2_SIZE 32 *2048
23+ #define L3_SIZE 32 *20480
24+ #define MEM_SIZE 32 *2048000
2525
2626struct quantize_perf_params {
2727 std::vector<std::string> include_types;
@@ -36,7 +36,7 @@ struct quantize_perf_params {
3636
3737#if defined(__x86_64__) || defined(__i386__)
3838
39- #include < immintrin .h>
39+ #include < x86intrin .h>
4040inline int64_t cpu_cycles () {
4141// Rough way to detect new-ish CPUs
4242#ifdef __POPCNT__
@@ -71,29 +71,25 @@ void * align_with_offset(void * ptr, int offset) {
7171}
7272
7373void benchmark_function (size_t size, size_t q_size, std::function<size_t (void )> function) {
74-
75- size_t bytes_out = 0 ;
76-
7774 int64_t min_time_us = INT64_MAX;
7875 int64_t total_time_us = 0 ;
7976 int64_t min_time_cycles = INT64_MAX;
8077 int64_t total_time_cycles = 0 ;
8178
8279 for (int i = 0 ; i < WARMUP; i++) {
83- bytes_out |= function ();
80+ function ();
8481 }
8582
8683
8784 for (int i = 0 ; i < ITERATIONS; i++) {
8885 const int64_t start_time = ggml_time_us ();
8986 const int64_t start_cycles = cpu_cycles ();
9087
91- bytes_out |= function ();
88+ function ();
9289
9390 const int64_t end_cycles = cpu_cycles ();
9491 const int64_t end_time = ggml_time_us ();
9592
96- // printf(" aostne %d\n", end_cycles - start_cycles);
9793 total_time_cycles += end_cycles - start_cycles;
9894 min_time_cycles = std::min (min_time_cycles, end_cycles - start_cycles);
9995 total_time_us += end_time - start_time;
0 commit comments