Skip to content

Commit a89dbc9

Browse files
Leo Yanacmel
authored andcommitted
perf arm-spe: Set sample's data source field
The sample structure contains the field 'data_src' which is used to tell the data operation attributions, e.g. operation type is loading or storing, cache level, it's snooping or remote accessing, etc. At the end, the 'data_src' will be parsed by perf mem/c2c tools to display human readable strings. This patch is to fill the 'data_src' field in the synthesized samples base on different types. Currently perf tool can display statistics for L1/L2/L3 caches but it doesn't support the 'last level cache'. To fit to current implementation, 'data_src' field uses L3 cache for last level cache. Before this commit, perf mem report looks like this: # Samples: 75K of event 'l1d-miss' # Total weight : 75951 # Sort order : local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked # # Overhead Samples Local Weight Memory access Symbol Shared Object Data Symbol Data Object Snoop TLB access # ........ ....... ............ ............. ...................... ............. ...................... ........... ..... .......... # 81.56% 61945 0 N/A [.] 0x00000000000009d8 serial_c [.] 0000000000000000 [unknown] N/A N/A 18.44% 14003 0 N/A [.] 0x0000000000000828 serial_c [.] 0000000000000000 [unknown] N/A N/A Now on a system with Arm SPE, addresses and access types are displayed: # Samples: 75K of event 'l1d-miss' # Total weight : 75951 # Sort order : local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked # # Overhead Samples Local Weight Memory access Symbol Shared Object Data Symbol Data Object Snoop TLB access # ........ ....... ............ ............. ...................... ............. ...................... ........... ..... .......... # 0.43% 324 0 L1 miss [.] 0x00000000000009d8 serial_c [.] 0x0000ffff80794e00 anon N/A Walker hit 0.42% 322 0 L1 miss [.] 0x00000000000009d8 serial_c [.] 0x0000ffff80794580 anon N/A Walker hit Signed-off-by: Leo Yan <[email protected]> Reviewed-by: James Clark <[email protected]> Tested-by: James Clark <[email protected]> Cc: Adrian Hunter <[email protected]> Cc: Alexander Shishkin <[email protected]> Cc: Al Grant <[email protected]> Cc: Andre Przywara <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: John Garry <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Mathieu Poirier <[email protected]> Cc: Namhyung Kim <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Wei Li <[email protected]> Cc: Will Deacon <[email protected]> Signed-off-by: James Clark <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent e55ed34 commit a89dbc9

File tree

1 file changed

+60
-9
lines changed

1 file changed

+60
-9
lines changed

tools/perf/util/arm-spe.c

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ arm_spe_deliver_synth_event(struct arm_spe *spe,
261261
}
262262

263263
static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
264-
u64 spe_events_id)
264+
u64 spe_events_id, u64 data_src)
265265
{
266266
struct arm_spe *spe = speq->spe;
267267
struct arm_spe_record *record = &speq->decoder->record;
@@ -274,6 +274,7 @@ static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
274274
sample.stream_id = spe_events_id;
275275
sample.addr = record->virt_addr;
276276
sample.phys_addr = record->phys_addr;
277+
sample.data_src = data_src;
277278

278279
return arm_spe_deliver_synth_event(spe, speq, event, &sample);
279280
}
@@ -307,49 +308,98 @@ static bool arm_spe__is_memory_event(enum arm_spe_sample_type type)
307308
return false;
308309
}
309310

311+
static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
312+
{
313+
union perf_mem_data_src data_src = { 0 };
314+
315+
if (record->op == ARM_SPE_LD)
316+
data_src.mem_op = PERF_MEM_OP_LOAD;
317+
else
318+
data_src.mem_op = PERF_MEM_OP_STORE;
319+
320+
if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
321+
data_src.mem_lvl = PERF_MEM_LVL_L3;
322+
323+
if (record->type & ARM_SPE_LLC_MISS)
324+
data_src.mem_lvl |= PERF_MEM_LVL_MISS;
325+
else
326+
data_src.mem_lvl |= PERF_MEM_LVL_HIT;
327+
} else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
328+
data_src.mem_lvl = PERF_MEM_LVL_L1;
329+
330+
if (record->type & ARM_SPE_L1D_MISS)
331+
data_src.mem_lvl |= PERF_MEM_LVL_MISS;
332+
else
333+
data_src.mem_lvl |= PERF_MEM_LVL_HIT;
334+
}
335+
336+
if (record->type & ARM_SPE_REMOTE_ACCESS)
337+
data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1;
338+
339+
if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
340+
data_src.mem_dtlb = PERF_MEM_TLB_WK;
341+
342+
if (record->type & ARM_SPE_TLB_MISS)
343+
data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
344+
else
345+
data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
346+
}
347+
348+
return data_src.val;
349+
}
350+
310351
static int arm_spe_sample(struct arm_spe_queue *speq)
311352
{
312353
const struct arm_spe_record *record = &speq->decoder->record;
313354
struct arm_spe *spe = speq->spe;
355+
u64 data_src;
314356
int err;
315357

358+
data_src = arm_spe__synth_data_source(record);
359+
316360
if (spe->sample_flc) {
317361
if (record->type & ARM_SPE_L1D_MISS) {
318-
err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id);
362+
err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
363+
data_src);
319364
if (err)
320365
return err;
321366
}
322367

323368
if (record->type & ARM_SPE_L1D_ACCESS) {
324-
err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id);
369+
err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
370+
data_src);
325371
if (err)
326372
return err;
327373
}
328374
}
329375

330376
if (spe->sample_llc) {
331377
if (record->type & ARM_SPE_LLC_MISS) {
332-
err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id);
378+
err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
379+
data_src);
333380
if (err)
334381
return err;
335382
}
336383

337384
if (record->type & ARM_SPE_LLC_ACCESS) {
338-
err = arm_spe__synth_mem_sample(speq, spe->llc_access_id);
385+
err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
386+
data_src);
339387
if (err)
340388
return err;
341389
}
342390
}
343391

344392
if (spe->sample_tlb) {
345393
if (record->type & ARM_SPE_TLB_MISS) {
346-
err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id);
394+
err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
395+
data_src);
347396
if (err)
348397
return err;
349398
}
350399

351400
if (record->type & ARM_SPE_TLB_ACCESS) {
352-
err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id);
401+
err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
402+
data_src);
353403
if (err)
354404
return err;
355405
}
@@ -363,13 +413,14 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
363413

364414
if (spe->sample_remote_access &&
365415
(record->type & ARM_SPE_REMOTE_ACCESS)) {
366-
err = arm_spe__synth_mem_sample(speq, spe->remote_access_id);
416+
err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
417+
data_src);
367418
if (err)
368419
return err;
369420
}
370421

371422
if (spe->sample_memory && arm_spe__is_memory_event(record->type)) {
372-
err = arm_spe__synth_mem_sample(speq, spe->memory_id);
423+
err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
373424
if (err)
374425
return err;
375426
}

0 commit comments

Comments
 (0)