mirror of https://github.com/Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
670 lines
16 KiB
670 lines
16 KiB
perf.data format |
|
|
|
Uptodate as of v4.7 |
|
|
|
This document describes the on-disk perf.data format, generated by perf record |
|
or perf inject and consumed by the other perf tools. |
|
|
|
On a high level perf.data contains the events generated by the PMUs, plus metadata. |
|
|
|
All fields are in native-endian of the machine that generated the perf.data. |
|
|
|
When perf is writing to a pipe it uses a special version of the file |
|
format that does not rely on seeking to adjust data offsets. This |
|
format is described in "Pipe-mode data" section. The pipe data version can be |
|
augmented with additional events using perf inject. |
|
|
|
The file starts with a perf_header: |
|
|
|
struct perf_header { |
|
char magic[8]; /* PERFILE2 */ |
|
uint64_t size; /* size of the header */ |
|
uint64_t attr_size; /* size of an attribute in attrs */ |
|
struct perf_file_section attrs; |
|
struct perf_file_section data; |
|
struct perf_file_section event_types; |
|
uint64_t flags; |
|
uint64_t flags1[3]; |
|
}; |
|
|
|
The magic number identifies the perf file and the version. Current perf versions |
|
use PERFILE2. Old perf versions generated a version 1 format (PERFFILE). Version 1 |
|
is not described here. The magic number also identifies the endian. When the |
|
magic value is 64bit byte swapped compared the file is in non-native |
|
endian. |
|
|
|
A perf_file_section contains a pointer to another section of the perf file. |
|
The header contains three such pointers: for attributes, data and event types. |
|
|
|
struct perf_file_section { |
|
uint64_t offset; /* offset from start of file */ |
|
uint64_t size; /* size of the section */ |
|
}; |
|
|
|
Flags section: |
|
|
|
For each of the optional features a perf_file_section it placed after the data |
|
section if the feature bit is set in the perf_header flags bitset. The |
|
respective perf_file_section points to the data of the additional header and |
|
defines its size. |
|
|
|
Some headers consist of strings, which are defined like this: |
|
|
|
struct perf_header_string { |
|
uint32_t len; |
|
char string[len]; /* zero terminated */ |
|
}; |
|
|
|
Some headers consist of a sequence of strings, which start with a |
|
|
|
struct perf_header_string_list { |
|
uint32_t nr; |
|
struct perf_header_string strings[nr]; /* variable length records */ |
|
}; |
|
|
|
The bits are the flags bits in a 256 bit bitmap starting with |
|
flags. These define the valid bits: |
|
|
|
HEADER_RESERVED = 0, /* always cleared */ |
|
HEADER_FIRST_FEATURE = 1, |
|
HEADER_TRACING_DATA = 1, |
|
|
|
Describe me. |
|
|
|
HEADER_BUILD_ID = 2, |
|
|
|
The header consists of an sequence of build_id_event. The size of each record |
|
is defined by header.size (see perf_event.h). Each event defines a ELF build id |
|
for a executable file name for a pid. An ELF build id is a unique identifier |
|
assigned by the linker to an executable. |
|
|
|
struct build_id_event { |
|
struct perf_event_header header; |
|
pid_t pid; |
|
uint8_t build_id[24]; |
|
char filename[header.size - offsetof(struct build_id_event, filename)]; |
|
}; |
|
|
|
HEADER_HOSTNAME = 3, |
|
|
|
A perf_header_string with the hostname where the data was collected |
|
(uname -n) |
|
|
|
HEADER_OSRELEASE = 4, |
|
|
|
A perf_header_string with the os release where the data was collected |
|
(uname -r) |
|
|
|
HEADER_VERSION = 5, |
|
|
|
A perf_header_string with the perf user tool version where the |
|
data was collected. This is the same as the version of the source tree |
|
the perf tool was built from. |
|
|
|
HEADER_ARCH = 6, |
|
|
|
A perf_header_string with the CPU architecture (uname -m) |
|
|
|
HEADER_NRCPUS = 7, |
|
|
|
A structure defining the number of CPUs. |
|
|
|
struct nr_cpus { |
|
uint32_t nr_cpus_available; /* CPUs not yet onlined */ |
|
uint32_t nr_cpus_online; |
|
}; |
|
|
|
HEADER_CPUDESC = 8, |
|
|
|
A perf_header_string with description of the CPU. On x86 this is the model name |
|
in /proc/cpuinfo |
|
|
|
HEADER_CPUID = 9, |
|
|
|
A perf_header_string with the exact CPU type. On x86 this is |
|
vendor,family,model,stepping. For example: GenuineIntel,6,69,1 |
|
|
|
HEADER_TOTAL_MEM = 10, |
|
|
|
An uint64_t with the total memory in kilobytes. |
|
|
|
HEADER_CMDLINE = 11, |
|
|
|
A perf_header_string_list with the perf arg-vector used to collect the data. |
|
|
|
HEADER_EVENT_DESC = 12, |
|
|
|
Another description of the perf_event_attrs, more detailed than header.attrs |
|
including IDs and names. See perf_event.h or the man page for a description |
|
of a struct perf_event_attr. |
|
|
|
struct { |
|
uint32_t nr; /* number of events */ |
|
uint32_t attr_size; /* size of each perf_event_attr */ |
|
struct { |
|
struct perf_event_attr attr; /* size of attr_size */ |
|
uint32_t nr_ids; |
|
struct perf_header_string event_string; |
|
uint64_t ids[nr_ids]; |
|
} events[nr]; /* Variable length records */ |
|
}; |
|
|
|
HEADER_CPU_TOPOLOGY = 13, |
|
|
|
struct { |
|
/* |
|
* First revision of HEADER_CPU_TOPOLOGY |
|
* |
|
* See 'struct perf_header_string_list' definition earlier |
|
* in this file. |
|
*/ |
|
|
|
struct perf_header_string_list cores; /* Variable length */ |
|
struct perf_header_string_list threads; /* Variable length */ |
|
|
|
/* |
|
* Second revision of HEADER_CPU_TOPOLOGY, older tools |
|
* will not consider what comes next |
|
*/ |
|
|
|
struct { |
|
uint32_t core_id; |
|
uint32_t socket_id; |
|
} cpus[nr]; /* Variable length records */ |
|
/* 'nr' comes from previously processed HEADER_NRCPUS's nr_cpu_avail */ |
|
|
|
/* |
|
* Third revision of HEADER_CPU_TOPOLOGY, older tools |
|
* will not consider what comes next |
|
*/ |
|
|
|
struct perf_header_string_list dies; /* Variable length */ |
|
uint32_t die_id[nr_cpus_avail]; /* from previously processed HEADER_NR_CPUS, VLA */ |
|
}; |
|
|
|
Example: |
|
sibling sockets : 0-8 |
|
sibling dies : 0-3 |
|
sibling dies : 4-7 |
|
sibling threads : 0-1 |
|
sibling threads : 2-3 |
|
sibling threads : 4-5 |
|
sibling threads : 6-7 |
|
|
|
HEADER_NUMA_TOPOLOGY = 14, |
|
|
|
A list of NUMA node descriptions |
|
|
|
struct { |
|
uint32_t nr; |
|
struct { |
|
uint32_t nodenr; |
|
uint64_t mem_total; |
|
uint64_t mem_free; |
|
struct perf_header_string cpus; |
|
} nodes[nr]; /* Variable length records */ |
|
}; |
|
|
|
HEADER_BRANCH_STACK = 15, |
|
|
|
Not implemented in perf. |
|
|
|
HEADER_PMU_MAPPINGS = 16, |
|
|
|
A list of PMU structures, defining the different PMUs supported by perf. |
|
|
|
struct { |
|
uint32_t nr; |
|
struct pmu { |
|
uint32_t pmu_type; |
|
struct perf_header_string pmu_name; |
|
} [nr]; /* Variable length records */ |
|
}; |
|
|
|
HEADER_GROUP_DESC = 17, |
|
|
|
Description of counter groups ({...} in perf syntax) |
|
|
|
struct { |
|
uint32_t nr; |
|
struct { |
|
struct perf_header_string string; |
|
uint32_t leader_idx; |
|
uint32_t nr_members; |
|
} [nr]; /* Variable length records */ |
|
}; |
|
|
|
HEADER_AUXTRACE = 18, |
|
|
|
Define additional auxtrace areas in the perf.data. auxtrace is used to store |
|
undecoded hardware tracing information, such as Intel Processor Trace data. |
|
|
|
/** |
|
* struct auxtrace_index_entry - indexes a AUX area tracing event within a |
|
* perf.data file. |
|
* @file_offset: offset within the perf.data file |
|
* @sz: size of the event |
|
*/ |
|
struct auxtrace_index_entry { |
|
u64 file_offset; |
|
u64 sz; |
|
}; |
|
|
|
#define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256 |
|
|
|
/** |
|
* struct auxtrace_index - index of AUX area tracing events within a perf.data |
|
* file. |
|
* @list: linking a number of arrays of entries |
|
* @nr: number of entries |
|
* @entries: array of entries |
|
*/ |
|
struct auxtrace_index { |
|
struct list_head list; |
|
size_t nr; |
|
struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT]; |
|
}; |
|
|
|
HEADER_STAT = 19, |
|
|
|
This is merely a flag signifying that the data section contains data |
|
recorded from perf stat record. |
|
|
|
HEADER_CACHE = 20, |
|
|
|
Description of the cache hierarchy. Based on the Linux sysfs format |
|
in /sys/devices/system/cpu/cpu*/cache/ |
|
|
|
u32 version Currently always 1 |
|
u32 number_of_cache_levels |
|
|
|
struct { |
|
u32 level; |
|
u32 line_size; |
|
u32 sets; |
|
u32 ways; |
|
struct perf_header_string type; |
|
struct perf_header_string size; |
|
struct perf_header_string map; |
|
}[number_of_cache_levels]; |
|
|
|
HEADER_SAMPLE_TIME = 21, |
|
|
|
Two uint64_t for the time of first sample and the time of last sample. |
|
|
|
HEADER_SAMPLE_TOPOLOGY = 22, |
|
|
|
Physical memory map and its node assignments. |
|
|
|
The format of data in MEM_TOPOLOGY is as follows: |
|
|
|
u64 version; // Currently 1 |
|
u64 block_size_bytes; // /sys/devices/system/memory/block_size_bytes |
|
u64 count; // number of nodes |
|
|
|
struct memory_node { |
|
u64 node_id; // node index |
|
u64 size; // size of bitmap |
|
struct bitmap { |
|
/* size of bitmap again */ |
|
u64 bitmapsize; |
|
/* bitmap of memory indexes that belongs to node */ |
|
/* /sys/devices/system/node/node<NODE>/memory<INDEX> */ |
|
u64 entries[(bitmapsize/64)+1]; |
|
} |
|
}[count]; |
|
|
|
The MEM_TOPOLOGY can be displayed with following command: |
|
|
|
$ perf report --header-only -I |
|
... |
|
# memory nodes (nr 1, block size 0x8000000): |
|
# 0 [7G]: 0-23,32-69 |
|
|
|
HEADER_CLOCKID = 23, |
|
|
|
One uint64_t for the clockid frequency, specified, for instance, via 'perf |
|
record -k' (see clock_gettime()), to enable timestamps derived metrics |
|
conversion into wall clock time on the reporting stage. |
|
|
|
HEADER_DIR_FORMAT = 24, |
|
|
|
The data files layout is described by HEADER_DIR_FORMAT feature. Currently it |
|
holds only version number (1): |
|
|
|
uint64_t version; |
|
|
|
The current version holds only version value (1) means that data files: |
|
|
|
- Follow the 'data.*' name format. |
|
|
|
- Contain raw events data in standard perf format as read from kernel (and need |
|
to be sorted) |
|
|
|
Future versions are expected to describe different data files layout according |
|
to special needs. |
|
|
|
HEADER_BPF_PROG_INFO = 25, |
|
|
|
struct bpf_prog_info_linear, which contains detailed information about |
|
a BPF program, including type, id, tag, jited/xlated instructions, etc. |
|
|
|
HEADER_BPF_BTF = 26, |
|
|
|
Contains BPF Type Format (BTF). For more information about BTF, please |
|
refer to Documentation/bpf/btf.rst. |
|
|
|
struct { |
|
u32 id; |
|
u32 data_size; |
|
char data[]; |
|
}; |
|
|
|
HEADER_COMPRESSED = 27, |
|
|
|
struct { |
|
u32 version; |
|
u32 type; |
|
u32 level; |
|
u32 ratio; |
|
u32 mmap_len; |
|
}; |
|
|
|
Indicates that trace contains records of PERF_RECORD_COMPRESSED type |
|
that have perf_events records in compressed form. |
|
|
|
HEADER_CPU_PMU_CAPS = 28, |
|
|
|
A list of cpu PMU capabilities. The format of data is as below. |
|
|
|
struct { |
|
u32 nr_cpu_pmu_caps; |
|
{ |
|
char name[]; |
|
char value[]; |
|
} [nr_cpu_pmu_caps] |
|
}; |
|
|
|
|
|
Example: |
|
cpu pmu capabilities: branches=32, max_precise=3, pmu_name=icelake |
|
|
|
HEADER_CLOCK_DATA = 29, |
|
|
|
Contains clock id and its reference time together with wall clock |
|
time taken at the 'same time', both values are in nanoseconds. |
|
The format of data is as below. |
|
|
|
struct { |
|
u32 version; /* version = 1 */ |
|
u32 clockid; |
|
u64 wall_clock_ns; |
|
u64 clockid_time_ns; |
|
}; |
|
|
|
HEADER_HYBRID_TOPOLOGY = 30, |
|
|
|
Indicate the hybrid CPUs. The format of data is as below. |
|
|
|
struct { |
|
u32 nr; |
|
struct { |
|
char pmu_name[]; |
|
char cpus[]; |
|
} [nr]; /* Variable length records */ |
|
}; |
|
|
|
Example: |
|
hybrid cpu system: |
|
cpu_core cpu list : 0-15 |
|
cpu_atom cpu list : 16-23 |
|
|
|
HEADER_HYBRID_CPU_PMU_CAPS = 31, |
|
|
|
A list of hybrid CPU PMU capabilities. |
|
|
|
struct { |
|
u32 nr_pmu; |
|
struct { |
|
u32 nr_cpu_pmu_caps; |
|
{ |
|
char name[]; |
|
char value[]; |
|
} [nr_cpu_pmu_caps]; |
|
char pmu_name[]; |
|
} [nr_pmu]; |
|
}; |
|
|
|
other bits are reserved and should ignored for now |
|
HEADER_FEAT_BITS = 256, |
|
|
|
Attributes |
|
|
|
This is an array of perf_event_attrs, each attr_size bytes long, which defines |
|
each event collected. See perf_event.h or the man page for a detailed |
|
description. |
|
|
|
Data |
|
|
|
This section is the bulk of the file. It consist of a stream of perf_events |
|
describing events. This matches the format generated by the kernel. |
|
See perf_event.h or the manpage for a detailed description. |
|
|
|
Some notes on parsing: |
|
|
|
Ordering |
|
|
|
The events are not necessarily in time stamp order, as they can be |
|
collected in parallel on different CPUs. If the events should be |
|
processed in time order they need to be sorted first. It is possible |
|
to only do a partial sort using the FINISHED_ROUND event header (see |
|
below). perf record guarantees that there is no reordering over a |
|
FINISHED_ROUND. |
|
|
|
ID vs IDENTIFIER |
|
|
|
When the event stream contains multiple events each event is identified |
|
by an ID. This can be either through the PERF_SAMPLE_ID or the |
|
PERF_SAMPLE_IDENTIFIER header. The PERF_SAMPLE_IDENTIFIER header is |
|
at a fixed offset from the event header, which allows reliable |
|
parsing of the header. Relying on ID may be ambiguous. |
|
IDENTIFIER is only supported by newer Linux kernels. |
|
|
|
Perf record specific events: |
|
|
|
In addition to the kernel generated event types perf record adds its |
|
own event types (in addition it also synthesizes some kernel events, |
|
for example MMAP events) |
|
|
|
PERF_RECORD_USER_TYPE_START = 64, |
|
PERF_RECORD_HEADER_ATTR = 64, |
|
|
|
struct attr_event { |
|
struct perf_event_header header; |
|
struct perf_event_attr attr; |
|
uint64_t id[]; |
|
}; |
|
|
|
PERF_RECORD_HEADER_EVENT_TYPE = 65, /* deprecated */ |
|
|
|
#define MAX_EVENT_NAME 64 |
|
|
|
struct perf_trace_event_type { |
|
uint64_t event_id; |
|
char name[MAX_EVENT_NAME]; |
|
}; |
|
|
|
struct event_type_event { |
|
struct perf_event_header header; |
|
struct perf_trace_event_type event_type; |
|
}; |
|
|
|
|
|
PERF_RECORD_HEADER_TRACING_DATA = 66, |
|
|
|
Describe me |
|
|
|
struct tracing_data_event { |
|
struct perf_event_header header; |
|
uint32_t size; |
|
}; |
|
|
|
PERF_RECORD_HEADER_BUILD_ID = 67, |
|
|
|
Define a ELF build ID for a referenced executable. |
|
|
|
struct build_id_event; /* See above */ |
|
|
|
PERF_RECORD_FINISHED_ROUND = 68, |
|
|
|
No event reordering over this header. No payload. |
|
|
|
PERF_RECORD_ID_INDEX = 69, |
|
|
|
Map event ids to CPUs and TIDs. |
|
|
|
struct id_index_entry { |
|
uint64_t id; |
|
uint64_t idx; |
|
uint64_t cpu; |
|
uint64_t tid; |
|
}; |
|
|
|
struct id_index_event { |
|
struct perf_event_header header; |
|
uint64_t nr; |
|
struct id_index_entry entries[nr]; |
|
}; |
|
|
|
PERF_RECORD_AUXTRACE_INFO = 70, |
|
|
|
Auxtrace type specific information. Describe me |
|
|
|
struct auxtrace_info_event { |
|
struct perf_event_header header; |
|
uint32_t type; |
|
uint32_t reserved__; /* For alignment */ |
|
uint64_t priv[]; |
|
}; |
|
|
|
PERF_RECORD_AUXTRACE = 71, |
|
|
|
Defines auxtrace data. Followed by the actual data. The contents of |
|
the auxtrace data is dependent on the event and the CPU. For example |
|
for Intel Processor Trace it contains Processor Trace data generated |
|
by the CPU. |
|
|
|
struct auxtrace_event { |
|
struct perf_event_header header; |
|
uint64_t size; |
|
uint64_t offset; |
|
uint64_t reference; |
|
uint32_t idx; |
|
uint32_t tid; |
|
uint32_t cpu; |
|
uint32_t reserved__; /* For alignment */ |
|
}; |
|
|
|
struct aux_event { |
|
struct perf_event_header header; |
|
uint64_t aux_offset; |
|
uint64_t aux_size; |
|
uint64_t flags; |
|
}; |
|
|
|
PERF_RECORD_AUXTRACE_ERROR = 72, |
|
|
|
Describes an error in hardware tracing |
|
|
|
enum auxtrace_error_type { |
|
PERF_AUXTRACE_ERROR_ITRACE = 1, |
|
PERF_AUXTRACE_ERROR_MAX |
|
}; |
|
|
|
#define MAX_AUXTRACE_ERROR_MSG 64 |
|
|
|
struct auxtrace_error_event { |
|
struct perf_event_header header; |
|
uint32_t type; |
|
uint32_t code; |
|
uint32_t cpu; |
|
uint32_t pid; |
|
uint32_t tid; |
|
uint32_t reserved__; /* For alignment */ |
|
uint64_t ip; |
|
char msg[MAX_AUXTRACE_ERROR_MSG]; |
|
}; |
|
|
|
PERF_RECORD_HEADER_FEATURE = 80, |
|
|
|
Describes a header feature. These are records used in pipe-mode that |
|
contain information that otherwise would be in perf.data file's header. |
|
|
|
PERF_RECORD_COMPRESSED = 81, |
|
|
|
struct compressed_event { |
|
struct perf_event_header header; |
|
char data[]; |
|
}; |
|
|
|
The header is followed by compressed data frame that can be decompressed |
|
into array of perf trace records. The size of the entire compressed event |
|
record including the header is limited by the max value of header.size. |
|
|
|
Event types |
|
|
|
Define the event attributes with their IDs. |
|
|
|
An array bound by the perf_file_section size. |
|
|
|
struct { |
|
struct perf_event_attr attr; /* Size defined by header.attr_size */ |
|
struct perf_file_section ids; |
|
} |
|
|
|
ids points to a array of uint64_t defining the ids for event attr attr. |
|
|
|
Pipe-mode data |
|
|
|
Pipe-mode avoid seeks in the file by removing the perf_file_section and flags |
|
from the struct perf_header. The trimmed header is: |
|
|
|
struct perf_pipe_file_header { |
|
u64 magic; |
|
u64 size; |
|
}; |
|
|
|
The information about attrs, data, and event_types is instead in the |
|
synthesized events PERF_RECORD_ATTR, PERF_RECORD_HEADER_TRACING_DATA, |
|
PERF_RECORD_HEADER_EVENT_TYPE, and PERF_RECORD_HEADER_FEATURE |
|
that are generated by perf record in pipe-mode. |
|
|
|
|
|
References: |
|
|
|
include/uapi/linux/perf_event.h |
|
|
|
This is the canonical description of the kernel generated perf_events |
|
and the perf_event_attrs. |
|
|
|
perf_events manpage |
|
|
|
A manpage describing perf_event and perf_event_attr is here: |
|
http://web.eece.maine.edu/~vweaver/projects/perf_events/programming.html |
|
This tends to be slightly behind the kernel include, but has better |
|
descriptions. An (typically older) version of the man page may be |
|
included with the standard Linux man pages, available with "man |
|
perf_events" |
|
|
|
pmu-tools |
|
|
|
https://github.com/andikleen/pmu-tools/tree/master/parser |
|
|
|
A definition of the perf.data format in python "construct" format is available |
|
in pmu-tools parser. This allows to read perf.data from python and dump it. |
|
|
|
quipper |
|
|
|
The quipper C++ parser is available at |
|
http://github.com/google/perf_data_converter/tree/master/src/quipper |
|
|
|
|