perf intel-pt: Fix PEBS-via-PT data_src
[ Upstream commit e00eac6b5b6d956f38d8880c44bf7fd9954063c3 ]
The Fixes commit did not add support for decoding PEBS-via-PT data_src.
Fix by adding support.
PEBS-via-PT is a feature of some E-core processors, starting with
processors based on Tremont microarchitecture. Because the kernel only
supports Intel PT features that are on all processors, there is no support
for PEBS-via-PT on hybrids.
Currently that leaves processors based on Tremont, Gracemont and Crestmont,
however there are no events on Tremont that produce data_src information,
and for Gracemont and Crestmont there are only:
mem-loads event=0xd0,umask=0x5,ldlat=3
mem-stores event=0xd0,umask=0x6
Affected processors include Alder Lake N (Gracemont), Sierra Forest
(Crestmont) and Grand Ridge (Crestmont).
Example:
# perf record -d -e intel_pt/branch=0/ -e mem-loads/aux-output/pp uname
Before:
# perf.before script --itrace=o -Fdata_src
0 |OP No|LVL N/A|SNP N/A|TLB N/A|LCK No|BLK N/A
0 |OP No|LVL N/A|SNP N/A|TLB N/A|LCK No|BLK N/A
After:
# perf script --itrace=o -Fdata_src
10268100142 |OP LOAD|LVL L1 hit|SNP None|TLB L1 or L2 hit|LCK No|BLK N/A
10450100442 |OP LOAD|LVL L2 hit|SNP None|TLB L2 miss|LCK No|BLK N/A
Fixes: 975846eddf
("perf intel-pt: Add memory information to synthesized PEBS sample")
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20250512093932.79854-2-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
committed by
Greg Kroah-Hartman
parent
1737865a12
commit
53385a400a
@@ -127,6 +127,7 @@ struct intel_pt {
|
||||
|
||||
bool single_pebs;
|
||||
bool sample_pebs;
|
||||
int pebs_data_src_fmt;
|
||||
struct evsel *pebs_evsel;
|
||||
|
||||
u64 evt_sample_type;
|
||||
@@ -175,6 +176,7 @@ enum switch_state {
|
||||
struct intel_pt_pebs_event {
|
||||
struct evsel *evsel;
|
||||
u64 id;
|
||||
int data_src_fmt;
|
||||
};
|
||||
|
||||
struct intel_pt_queue {
|
||||
@@ -2232,7 +2234,146 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack,
|
||||
}
|
||||
}
|
||||
|
||||
static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id)
|
||||
#define P(a, b) PERF_MEM_S(a, b)
|
||||
#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
|
||||
#define LEVEL(x) P(LVLNUM, x)
|
||||
#define REM P(REMOTE, REMOTE)
|
||||
#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
|
||||
|
||||
#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10
|
||||
#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1)
|
||||
|
||||
/* Based on kernel __intel_pmu_pebs_data_source_grt() and pebs_data_source */
|
||||
static const u64 pebs_data_source_grt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = {
|
||||
P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */
|
||||
OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */
|
||||
OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */
|
||||
OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */
|
||||
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */
|
||||
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP Hit */
|
||||
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */
|
||||
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */
|
||||
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP Fwd */
|
||||
OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */
|
||||
OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* RAM hit|SNP Hit */
|
||||
OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* Remote L3 hit|SNP Hit */
|
||||
OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* RAM hit|SNP None or Miss */
|
||||
OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* Remote RAM hit|SNP None or Miss */
|
||||
OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */
|
||||
OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */
|
||||
};
|
||||
|
||||
/* Based on kernel __intel_pmu_pebs_data_source_cmt() and pebs_data_source */
|
||||
static const u64 pebs_data_source_cmt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = {
|
||||
P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */
|
||||
OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */
|
||||
OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */
|
||||
OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */
|
||||
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */
|
||||
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* L3 hit|SNP Hit */
|
||||
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP HitM */
|
||||
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP HitM */
|
||||
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP Fwd */
|
||||
OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */
|
||||
OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* RAM hit|SNP Hit */
|
||||
OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE), /* Remote L3 hit|SNP Hit */
|
||||
OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD), /* RAM hit|SNP None or Miss */
|
||||
OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM), /* Remote RAM hit|SNP None or Miss */
|
||||
OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */
|
||||
OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */
|
||||
};
|
||||
|
||||
/* Based on kernel pebs_set_tlb_lock() */
|
||||
static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
|
||||
{
|
||||
/*
|
||||
* TLB access
|
||||
* 0 = did not miss 2nd level TLB
|
||||
* 1 = missed 2nd level TLB
|
||||
*/
|
||||
if (tlb)
|
||||
*val |= P(TLB, MISS) | P(TLB, L2);
|
||||
else
|
||||
*val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
|
||||
|
||||
/* locked prefix */
|
||||
if (lock)
|
||||
*val |= P(LOCK, LOCKED);
|
||||
}
|
||||
|
||||
/* Based on kernel __grt_latency_data() */
|
||||
static u64 intel_pt_grt_latency_data(u8 dse, bool tlb, bool lock, bool blk,
|
||||
const u64 *pebs_data_source)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK;
|
||||
val = pebs_data_source[dse];
|
||||
|
||||
pebs_set_tlb_lock(&val, tlb, lock);
|
||||
|
||||
if (blk)
|
||||
val |= P(BLK, DATA);
|
||||
else
|
||||
val |= P(BLK, NA);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
/* Default value for data source */
|
||||
#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\
|
||||
PERF_MEM_S(LVL, NA) |\
|
||||
PERF_MEM_S(SNOOP, NA) |\
|
||||
PERF_MEM_S(LOCK, NA) |\
|
||||
PERF_MEM_S(TLB, NA) |\
|
||||
PERF_MEM_S(LVLNUM, NA))
|
||||
|
||||
enum DATA_SRC_FORMAT {
|
||||
DATA_SRC_FORMAT_ERR = -1,
|
||||
DATA_SRC_FORMAT_NA = 0,
|
||||
DATA_SRC_FORMAT_GRT = 1,
|
||||
DATA_SRC_FORMAT_CMT = 2,
|
||||
};
|
||||
|
||||
/* Based on kernel grt_latency_data() and cmt_latency_data */
|
||||
static u64 intel_pt_get_data_src(u64 mem_aux_info, int data_src_fmt)
|
||||
{
|
||||
switch (data_src_fmt) {
|
||||
case DATA_SRC_FORMAT_GRT: {
|
||||
union {
|
||||
u64 val;
|
||||
struct {
|
||||
unsigned int dse:4;
|
||||
unsigned int locked:1;
|
||||
unsigned int stlb_miss:1;
|
||||
unsigned int fwd_blk:1;
|
||||
unsigned int reserved:25;
|
||||
};
|
||||
} x = {.val = mem_aux_info};
|
||||
return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk,
|
||||
pebs_data_source_grt);
|
||||
}
|
||||
case DATA_SRC_FORMAT_CMT: {
|
||||
union {
|
||||
u64 val;
|
||||
struct {
|
||||
unsigned int dse:5;
|
||||
unsigned int locked:1;
|
||||
unsigned int stlb_miss:1;
|
||||
unsigned int fwd_blk:1;
|
||||
unsigned int reserved:24;
|
||||
};
|
||||
} x = {.val = mem_aux_info};
|
||||
return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk,
|
||||
pebs_data_source_cmt);
|
||||
}
|
||||
default:
|
||||
return PERF_MEM_NA;
|
||||
}
|
||||
}
|
||||
|
||||
static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel,
|
||||
u64 id, int data_src_fmt)
|
||||
{
|
||||
const struct intel_pt_blk_items *items = &ptq->state->items;
|
||||
struct perf_sample sample = { .ip = 0, };
|
||||
@@ -2350,6 +2491,18 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse
|
||||
}
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_DATA_SRC) {
|
||||
if (items->has_mem_aux_info && data_src_fmt) {
|
||||
if (data_src_fmt < 0) {
|
||||
pr_err("Intel PT missing data_src info\n");
|
||||
return -1;
|
||||
}
|
||||
sample.data_src = intel_pt_get_data_src(items->mem_aux_info, data_src_fmt);
|
||||
} else {
|
||||
sample.data_src = PERF_MEM_NA;
|
||||
}
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) {
|
||||
u64 ax = items->has_rax ? items->rax : 0;
|
||||
/* Refer kernel's intel_hsw_transaction() */
|
||||
@@ -2368,9 +2521,10 @@ static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq)
|
||||
{
|
||||
struct intel_pt *pt = ptq->pt;
|
||||
struct evsel *evsel = pt->pebs_evsel;
|
||||
int data_src_fmt = pt->pebs_data_src_fmt;
|
||||
u64 id = evsel->core.id[0];
|
||||
|
||||
return intel_pt_do_synth_pebs_sample(ptq, evsel, id);
|
||||
return intel_pt_do_synth_pebs_sample(ptq, evsel, id, data_src_fmt);
|
||||
}
|
||||
|
||||
static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
|
||||
@@ -2395,7 +2549,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
|
||||
hw_id);
|
||||
return intel_pt_synth_single_pebs_sample(ptq);
|
||||
}
|
||||
err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id);
|
||||
err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id, pe->data_src_fmt);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
@@ -3355,6 +3509,49 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt,
|
||||
event->itrace_start.tid);
|
||||
}
|
||||
|
||||
/*
|
||||
* Events with data_src are identified by L1_Hit_Indication
|
||||
* refer https://github.com/intel/perfmon
|
||||
*/
|
||||
static int intel_pt_data_src_fmt(struct intel_pt *pt, struct evsel *evsel)
|
||||
{
|
||||
struct perf_env *env = pt->machine->env;
|
||||
int fmt = DATA_SRC_FORMAT_NA;
|
||||
|
||||
if (!env->cpuid)
|
||||
return DATA_SRC_FORMAT_ERR;
|
||||
|
||||
/*
|
||||
* PEBS-via-PT is only supported on E-core non-hybrid. Of those only
|
||||
* Gracemont and Crestmont have data_src. Check for:
|
||||
* Alderlake N (Gracemont)
|
||||
* Sierra Forest (Crestmont)
|
||||
* Grand Ridge (Crestmont)
|
||||
*/
|
||||
|
||||
if (!strncmp(env->cpuid, "GenuineIntel,6,190,", 19))
|
||||
fmt = DATA_SRC_FORMAT_GRT;
|
||||
|
||||
if (!strncmp(env->cpuid, "GenuineIntel,6,175,", 19) ||
|
||||
!strncmp(env->cpuid, "GenuineIntel,6,182,", 19))
|
||||
fmt = DATA_SRC_FORMAT_CMT;
|
||||
|
||||
if (fmt == DATA_SRC_FORMAT_NA)
|
||||
return fmt;
|
||||
|
||||
/*
|
||||
* Only data_src events are:
|
||||
* mem-loads event=0xd0,umask=0x5
|
||||
* mem-stores event=0xd0,umask=0x6
|
||||
*/
|
||||
if (evsel->core.attr.type == PERF_TYPE_RAW &&
|
||||
((evsel->core.attr.config & 0xffff) == 0x5d0 ||
|
||||
(evsel->core.attr.config & 0xffff) == 0x6d0))
|
||||
return fmt;
|
||||
|
||||
return DATA_SRC_FORMAT_NA;
|
||||
}
|
||||
|
||||
static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt,
|
||||
union perf_event *event,
|
||||
struct perf_sample *sample)
|
||||
@@ -3375,6 +3572,7 @@ static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt,
|
||||
|
||||
ptq->pebs[hw_id].evsel = evsel;
|
||||
ptq->pebs[hw_id].id = sample->id;
|
||||
ptq->pebs[hw_id].data_src_fmt = intel_pt_data_src_fmt(pt, evsel);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -3946,6 +4144,7 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt)
|
||||
}
|
||||
pt->single_pebs = true;
|
||||
pt->sample_pebs = true;
|
||||
pt->pebs_data_src_fmt = intel_pt_data_src_fmt(pt, evsel);
|
||||
pt->pebs_evsel = evsel;
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user