3434#include "arm-spe-decoder/arm-spe-decoder.h"
3535#include "arm-spe-decoder/arm-spe-pkt-decoder.h"
3636
37+ #include "../../arch/arm64/include/asm/cputype.h"
3738#define MAX_TIMESTAMP (~0ULL)
3839
3940struct arm_spe {
@@ -45,6 +46,7 @@ struct arm_spe {
4546 struct perf_session * session ;
4647 struct machine * machine ;
4748 u32 pmu_type ;
49+ u64 midr ;
4850
4951 struct perf_tsc_conversion tc ;
5052
@@ -387,35 +389,128 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
387389 return arm_spe_deliver_synth_event (spe , speq , event , & sample );
388390}
389391
390- static u64 arm_spe__synth_data_source (const struct arm_spe_record * record )
392+ static const struct midr_range neoverse_spe [] = {
393+ MIDR_ALL_VERSIONS (MIDR_NEOVERSE_N1 ),
394+ MIDR_ALL_VERSIONS (MIDR_NEOVERSE_N2 ),
395+ MIDR_ALL_VERSIONS (MIDR_NEOVERSE_V1 ),
396+ {},
397+ };
398+
399+ static void arm_spe__synth_data_source_neoverse (const struct arm_spe_record * record ,
400+ union perf_mem_data_src * data_src )
391401{
392- union perf_mem_data_src data_src = { 0 };
402+ /*
403+ * Even though four levels of cache hierarchy are possible, no known
404+ * production Neoverse systems currently include more than three levels
405+ * so for the time being we assume three exist. If a production system
406+ * is built with four the this function would have to be changed to
407+ * detect the number of levels for reporting.
408+ */
393409
394- if (record -> op == ARM_SPE_LD )
395- data_src .mem_op = PERF_MEM_OP_LOAD ;
396- else if (record -> op == ARM_SPE_ST )
397- data_src .mem_op = PERF_MEM_OP_STORE ;
398- else
399- return 0 ;
410+ /*
411+ * We have no data on the hit level or data source for stores in the
412+ * Neoverse SPE records.
413+ */
414+ if (record -> op & ARM_SPE_ST ) {
415+ data_src -> mem_lvl = PERF_MEM_LVL_NA ;
416+ data_src -> mem_lvl_num = PERF_MEM_LVLNUM_NA ;
417+ data_src -> mem_snoop = PERF_MEM_SNOOP_NA ;
418+ return ;
419+ }
420+
421+ switch (record -> source ) {
422+ case ARM_SPE_NV_L1D :
423+ data_src -> mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT ;
424+ data_src -> mem_lvl_num = PERF_MEM_LVLNUM_L1 ;
425+ data_src -> mem_snoop = PERF_MEM_SNOOP_NONE ;
426+ break ;
427+ case ARM_SPE_NV_L2 :
428+ data_src -> mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT ;
429+ data_src -> mem_lvl_num = PERF_MEM_LVLNUM_L2 ;
430+ data_src -> mem_snoop = PERF_MEM_SNOOP_NONE ;
431+ break ;
432+ case ARM_SPE_NV_PEER_CORE :
433+ data_src -> mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT ;
434+ data_src -> mem_lvl_num = PERF_MEM_LVLNUM_L2 ;
435+ data_src -> mem_snoopx = PERF_MEM_SNOOPX_PEER ;
436+ break ;
437+ /*
438+ * We don't know if this is L1, L2 but we do know it was a cache-2-cache
439+ * transfer, so set SNOOPX_PEER
440+ */
441+ case ARM_SPE_NV_LOCAL_CLUSTER :
442+ case ARM_SPE_NV_PEER_CLUSTER :
443+ data_src -> mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT ;
444+ data_src -> mem_lvl_num = PERF_MEM_LVLNUM_L3 ;
445+ data_src -> mem_snoopx = PERF_MEM_SNOOPX_PEER ;
446+ break ;
447+ /*
448+ * System cache is assumed to be L3
449+ */
450+ case ARM_SPE_NV_SYS_CACHE :
451+ data_src -> mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT ;
452+ data_src -> mem_lvl_num = PERF_MEM_LVLNUM_L3 ;
453+ data_src -> mem_snoop = PERF_MEM_SNOOP_HIT ;
454+ break ;
455+ /*
456+ * We don't know what level it hit in, except it came from the other
457+ * socket
458+ */
459+ case ARM_SPE_NV_REMOTE :
460+ data_src -> mem_lvl = PERF_MEM_LVL_REM_CCE1 ;
461+ data_src -> mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE ;
462+ data_src -> mem_remote = PERF_MEM_REMOTE_REMOTE ;
463+ data_src -> mem_snoopx = PERF_MEM_SNOOPX_PEER ;
464+ break ;
465+ case ARM_SPE_NV_DRAM :
466+ data_src -> mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT ;
467+ data_src -> mem_lvl_num = PERF_MEM_LVLNUM_RAM ;
468+ data_src -> mem_snoop = PERF_MEM_SNOOP_NONE ;
469+ break ;
470+ default :
471+ break ;
472+ }
473+ }
400474
475+ static void arm_spe__synth_data_source_generic (const struct arm_spe_record * record ,
476+ union perf_mem_data_src * data_src )
477+ {
401478 if (record -> type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS )) {
402- data_src . mem_lvl = PERF_MEM_LVL_L3 ;
479+ data_src -> mem_lvl = PERF_MEM_LVL_L3 ;
403480
404481 if (record -> type & ARM_SPE_LLC_MISS )
405- data_src . mem_lvl |= PERF_MEM_LVL_MISS ;
482+ data_src -> mem_lvl |= PERF_MEM_LVL_MISS ;
406483 else
407- data_src . mem_lvl |= PERF_MEM_LVL_HIT ;
484+ data_src -> mem_lvl |= PERF_MEM_LVL_HIT ;
408485 } else if (record -> type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS )) {
409- data_src . mem_lvl = PERF_MEM_LVL_L1 ;
486+ data_src -> mem_lvl = PERF_MEM_LVL_L1 ;
410487
411488 if (record -> type & ARM_SPE_L1D_MISS )
412- data_src . mem_lvl |= PERF_MEM_LVL_MISS ;
489+ data_src -> mem_lvl |= PERF_MEM_LVL_MISS ;
413490 else
414- data_src . mem_lvl |= PERF_MEM_LVL_HIT ;
491+ data_src -> mem_lvl |= PERF_MEM_LVL_HIT ;
415492 }
416493
417494 if (record -> type & ARM_SPE_REMOTE_ACCESS )
418- data_src .mem_lvl |= PERF_MEM_LVL_REM_CCE1 ;
495+ data_src -> mem_lvl |= PERF_MEM_LVL_REM_CCE1 ;
496+ }
497+
498+ static u64 arm_spe__synth_data_source (const struct arm_spe_record * record , u64 midr )
499+ {
500+ union perf_mem_data_src data_src = { 0 };
501+ bool is_neoverse = is_midr_in_range (midr , neoverse_spe );
502+
503+ if (record -> op == ARM_SPE_LD )
504+ data_src .mem_op = PERF_MEM_OP_LOAD ;
505+ else if (record -> op == ARM_SPE_ST )
506+ data_src .mem_op = PERF_MEM_OP_STORE ;
507+ else
508+ return 0 ;
509+
510+ if (is_neoverse )
511+ arm_spe__synth_data_source_neoverse (record , & data_src );
512+ else
513+ arm_spe__synth_data_source_generic (record , & data_src );
419514
420515 if (record -> type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS )) {
421516 data_src .mem_dtlb = PERF_MEM_TLB_WK ;
@@ -436,7 +531,7 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
436531 u64 data_src ;
437532 int err ;
438533
439- data_src = arm_spe__synth_data_source (record );
534+ data_src = arm_spe__synth_data_source (record , spe -> midr );
440535
441536 if (spe -> sample_flc ) {
442537 if (record -> type & ARM_SPE_L1D_MISS ) {
@@ -1178,6 +1273,8 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
11781273 struct perf_record_auxtrace_info * auxtrace_info = & event -> auxtrace_info ;
11791274 size_t min_sz = sizeof (u64 ) * ARM_SPE_AUXTRACE_PRIV_MAX ;
11801275 struct perf_record_time_conv * tc = & session -> time_conv ;
1276+ const char * cpuid = perf_env__cpuid (session -> evlist -> env );
1277+ u64 midr = strtol (cpuid , NULL , 16 );
11811278 struct arm_spe * spe ;
11821279 int err ;
11831280
@@ -1197,6 +1294,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
11971294 spe -> machine = & session -> machines .host ; /* No kvm support */
11981295 spe -> auxtrace_type = auxtrace_info -> type ;
11991296 spe -> pmu_type = auxtrace_info -> priv [ARM_SPE_PMU_TYPE ];
1297+ spe -> midr = midr ;
12001298
12011299 spe -> timeless_decoding = arm_spe__is_timeless_decoding (spe );
12021300
0 commit comments