forked from Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
637 lines
14 KiB
637 lines
14 KiB
/* |
|
* raspberrypi_axi_monitor.c |
|
* |
|
* Author: [email protected] |
|
* |
|
* Raspberry Pi AXI performance counters. |
|
* |
|
* Copyright (C) 2017 Raspberry Pi Trading Ltd. |
|
* |
|
* This program is free software; you can redistribute it and/or modify |
|
* it under the terms of the GNU General Public License version 2 as |
|
* published by the Free Software Foundation. |
|
*/ |
|
|
|
#include <linux/debugfs.h> |
|
#include <linux/devcoredump.h> |
|
#include <linux/device.h> |
|
#include <linux/kthread.h> |
|
#include <linux/module.h> |
|
#include <linux/netdevice.h> |
|
#include <linux/mutex.h> |
|
#include <linux/of.h> |
|
#include <linux/platform_device.h> |
|
|
|
#include <soc/bcm2835/raspberrypi-firmware.h> |
|
|
|
#define NUM_MONITORS 2 |
|
#define NUM_BUS_WATCHERS_PER_MONITOR 3 |
|
|
|
#define SYSTEM_MONITOR 0 |
|
#define VPU_MONITOR 1 |
|
|
|
#define MAX_BUSES 16 |
|
#define DEFAULT_SAMPLE_TIME 100 |
|
|
|
#define NUM_BUS_WATCHER_RESULTS 9 |
|
|
|
struct bus_watcher_data { |
|
union { |
|
u32 results[NUM_BUS_WATCHER_RESULTS]; |
|
struct { |
|
u32 atrans; |
|
u32 atwait; |
|
u32 amax; |
|
u32 wtrans; |
|
u32 wtwait; |
|
u32 wmax; |
|
u32 rtrans; |
|
u32 rtwait; |
|
u32 rmax; |
|
}; |
|
}; |
|
}; |
|
|
|
|
|
struct rpi_axiperf { |
|
struct platform_device *dev; |
|
struct dentry *root_folder; |
|
|
|
struct task_struct *monitor_thread; |
|
struct mutex lock; |
|
|
|
struct rpi_firmware *firmware; |
|
|
|
/* Sample time spent on for each bus */ |
|
int sample_time; |
|
|
|
/* Now storage for the per monitor settings and the resulting |
|
* performance figures |
|
*/ |
|
struct { |
|
/* Bit field of buses we want to monitor */ |
|
int bus_enabled; |
|
/* Bit field of buses to filter by */ |
|
int bus_filter; |
|
/* The current buses being monitored on this monitor */ |
|
int current_bus[NUM_BUS_WATCHERS_PER_MONITOR]; |
|
/* The last bus monitored on this monitor */ |
|
int last_monitored; |
|
|
|
/* Set true if this mailbox must use the mailbox interface |
|
* rather than access registers directly. |
|
*/ |
|
int use_mailbox_interface; |
|
|
|
/* Current result values */ |
|
struct bus_watcher_data results[MAX_BUSES]; |
|
|
|
struct dentry *debugfs_entry; |
|
void __iomem *base_address; |
|
|
|
} monitor[NUM_MONITORS]; |
|
|
|
}; |
|
|
|
static struct rpi_axiperf *state; |
|
|
|
/* Two monitors, System and VPU, each with the following register sets. |
|
* Each monitor can only monitor one bus at a time, so we time share them, |
|
* giving each bus 100ms (default, settable via debugfs) of time on its |
|
* associated monitor |
|
* Record results from the three Bus watchers per monitor and push to the sysfs |
|
*/ |
|
|
|
/* general registers */ |
|
const int GEN_CTRL; |
|
|
|
const int GEN_CTL_ENABLE_BIT = BIT(0); |
|
const int GEN_CTL_RESET_BIT = BIT(1); |
|
|
|
/* Bus watcher registers */ |
|
const int BW_PITCH = 0x40; |
|
|
|
const int BW0_CTRL = 0x40; |
|
const int BW1_CTRL = 0x80; |
|
const int BW2_CTRL = 0xc0; |
|
|
|
const int BW_ATRANS_OFFSET = 0x04; |
|
const int BW_ATWAIT_OFFSET = 0x08; |
|
const int BW_AMAX_OFFSET = 0x0c; |
|
const int BW_WTRANS_OFFSET = 0x10; |
|
const int BW_WTWAIT_OFFSET = 0x14; |
|
const int BW_WMAX_OFFSET = 0x18; |
|
const int BW_RTRANS_OFFSET = 0x1c; |
|
const int BW_RTWAIT_OFFSET = 0x20; |
|
const int BW_RMAX_OFFSET = 0x24; |
|
|
|
const int BW_CTRL_RESET_BIT = BIT(31); |
|
const int BW_CTRL_ENABLE_BIT = BIT(30); |
|
const int BW_CTRL_ENABLE_ID_FILTER_BIT = BIT(29); |
|
const int BW_CTRL_LIMIT_HALT_BIT = BIT(28); |
|
|
|
const int BW_CTRL_SOURCE_SHIFT = 8; |
|
const int BW_CTRL_SOURCE_MASK = GENMASK(12, 8); // 5 bits |
|
const int BW_CTRL_BUS_WATCH_SHIFT; |
|
const int BW_CTRL_BUS_WATCH_MASK = GENMASK(5, 0); // 6 bits |
|
const int BW_CTRL_BUS_FILTER_SHIFT = 8; |
|
|
|
const static char *bus_filter_strings[] = { |
|
"", |
|
"CORE0_V", |
|
"ICACHE0", |
|
"DCACHE0", |
|
"CORE1_V", |
|
"ICACHE1", |
|
"DCACHE1", |
|
"L2_MAIN", |
|
"HOST_PORT", |
|
"HOST_PORT2", |
|
"HVS", |
|
"ISP", |
|
"VIDEO_DCT", |
|
"VIDEO_SD2AXI", |
|
"CAM0", |
|
"CAM1", |
|
"DMA0", |
|
"DMA1", |
|
"DMA2_VPU", |
|
"JPEG", |
|
"VIDEO_CME", |
|
"TRANSPOSER", |
|
"VIDEO_FME", |
|
"CCP2TX", |
|
"USB", |
|
"V3D0", |
|
"V3D1", |
|
"V3D2", |
|
"AVE", |
|
"DEBUG", |
|
"CPU", |
|
"M30" |
|
}; |
|
|
|
const int num_bus_filters = ARRAY_SIZE(bus_filter_strings); |
|
|
|
const static char *system_bus_string[] = { |
|
"DMA_L2", |
|
"TRANS", |
|
"JPEG", |
|
"SYSTEM_UC", |
|
"DMA_UC", |
|
"SYSTEM_L2", |
|
"CCP2TX", |
|
"MPHI_RX", |
|
"MPHI_TX", |
|
"HVS", |
|
"H264", |
|
"ISP", |
|
"V3D", |
|
"PERIPHERAL", |
|
"CPU_UC", |
|
"CPU_L2" |
|
}; |
|
|
|
const int num_system_buses = ARRAY_SIZE(system_bus_string); |
|
|
|
const static char *vpu_bus_string[] = { |
|
"VPU1_D_L2", |
|
"VPU0_D_L2", |
|
"VPU1_I_L2", |
|
"VPU0_I_L2", |
|
"SYSTEM_L2", |
|
"L2_FLUSH", |
|
"DMA_L2", |
|
"VPU1_D_UC", |
|
"VPU0_D_UC", |
|
"VPU1_I_UC", |
|
"VPU0_I_UC", |
|
"SYSTEM_UC", |
|
"L2_OUT", |
|
"DMA_UC", |
|
"SDRAM", |
|
"L2_IN" |
|
}; |
|
|
|
const int num_vpu_buses = ARRAY_SIZE(vpu_bus_string); |
|
|
|
const static char *monitor_name[] = { |
|
"System", |
|
"VPU" |
|
}; |
|
|
|
static inline void write_reg(int monitor, int reg, u32 value) |
|
{ |
|
writel(value, state->monitor[monitor].base_address + reg); |
|
} |
|
|
|
static inline u32 read_reg(int monitor, u32 reg) |
|
{ |
|
return readl(state->monitor[monitor].base_address + reg); |
|
} |
|
|
|
static void read_bus_watcher(int monitor, int watcher, u32 *results) |
|
{ |
|
if (state->monitor[monitor].use_mailbox_interface) { |
|
/* We have 9 results, plus the overheads of start address and |
|
* length So 11 u32 to define |
|
*/ |
|
u32 tmp[11]; |
|
int err; |
|
|
|
tmp[0] = (u32)(uintptr_t)(state->monitor[monitor].base_address + watcher |
|
+ BW_ATRANS_OFFSET); |
|
tmp[1] = NUM_BUS_WATCHER_RESULTS; |
|
|
|
err = rpi_firmware_property(state->firmware, |
|
RPI_FIRMWARE_GET_PERIPH_REG, |
|
tmp, sizeof(tmp)); |
|
|
|
if (err < 0 || tmp[1] != NUM_BUS_WATCHER_RESULTS) |
|
dev_err_once(&state->dev->dev, |
|
"Failed to read bus watcher"); |
|
else |
|
memcpy(results, &tmp[2], |
|
NUM_BUS_WATCHER_RESULTS * sizeof(u32)); |
|
} else { |
|
int i; |
|
void __iomem *addr = state->monitor[monitor].base_address |
|
+ watcher + BW_ATRANS_OFFSET; |
|
for (i = 0; i < NUM_BUS_WATCHER_RESULTS; i++, addr += 4) |
|
*results++ = readl(addr); |
|
} |
|
} |
|
|
|
static void set_monitor_control(int monitor, u32 set) |
|
{ |
|
if (state->monitor[monitor].use_mailbox_interface) { |
|
u32 tmp[3] = {(u32)(uintptr_t)(state->monitor[monitor].base_address + |
|
GEN_CTRL), 1, set}; |
|
int err = rpi_firmware_property(state->firmware, |
|
RPI_FIRMWARE_SET_PERIPH_REG, |
|
tmp, sizeof(tmp)); |
|
|
|
if (err < 0 || tmp[1] != 1) |
|
dev_err_once(&state->dev->dev, |
|
"Failed to set monitor control"); |
|
} else |
|
write_reg(monitor, GEN_CTRL, set); |
|
} |
|
|
|
static void set_bus_watcher_control(int monitor, int watcher, u32 set) |
|
{ |
|
if (state->monitor[monitor].use_mailbox_interface) { |
|
u32 tmp[3] = {(u32)(uintptr_t)(state->monitor[monitor].base_address + |
|
watcher), 1, set}; |
|
int err = rpi_firmware_property(state->firmware, |
|
RPI_FIRMWARE_SET_PERIPH_REG, |
|
tmp, sizeof(tmp)); |
|
if (err < 0 || tmp[1] != 1) |
|
dev_err_once(&state->dev->dev, |
|
"Failed to set bus watcher control"); |
|
} else |
|
write_reg(monitor, watcher, set); |
|
} |
|
|
|
static void monitor(struct rpi_axiperf *state) |
|
{ |
|
int monitor, num_buses[NUM_MONITORS]; |
|
|
|
mutex_lock(&state->lock); |
|
|
|
for (monitor = 0; monitor < NUM_MONITORS; monitor++) { |
|
typeof(state->monitor[0]) *mon = &(state->monitor[monitor]); |
|
|
|
/* Anything enabled? */ |
|
if (mon->bus_enabled == 0) { |
|
/* No, disable all monitoring for this monitor */ |
|
set_monitor_control(monitor, GEN_CTL_RESET_BIT); |
|
} else { |
|
int i; |
|
|
|
/* Find out how many busses we want to monitor, and |
|
* spread our 3 actual monitors over them |
|
*/ |
|
num_buses[monitor] = hweight32(mon->bus_enabled); |
|
num_buses[monitor] = min(num_buses[monitor], |
|
NUM_BUS_WATCHERS_PER_MONITOR); |
|
|
|
for (i = 0; i < num_buses[monitor]; i++) { |
|
int bus_control; |
|
|
|
do { |
|
mon->last_monitored++; |
|
mon->last_monitored &= 0xf; |
|
} while ((mon->bus_enabled & |
|
(1 << mon->last_monitored)) == 0); |
|
|
|
mon->current_bus[i] = mon->last_monitored; |
|
|
|
/* Reset the counters */ |
|
set_bus_watcher_control(monitor, |
|
BW0_CTRL + |
|
i*BW_PITCH, |
|
BW_CTRL_RESET_BIT); |
|
|
|
bus_control = BW_CTRL_ENABLE_BIT | |
|
mon->current_bus[i]; |
|
|
|
if (mon->bus_filter) { |
|
bus_control |= |
|
BW_CTRL_ENABLE_ID_FILTER_BIT; |
|
bus_control |= |
|
((mon->bus_filter & 0x1f) |
|
<< BW_CTRL_BUS_FILTER_SHIFT); |
|
} |
|
|
|
// Start capture |
|
set_bus_watcher_control(monitor, |
|
BW0_CTRL + i*BW_PITCH, |
|
bus_control); |
|
} |
|
} |
|
|
|
/* start monitoring */ |
|
set_monitor_control(monitor, GEN_CTL_ENABLE_BIT); |
|
} |
|
|
|
mutex_unlock(&state->lock); |
|
|
|
msleep(state->sample_time); |
|
|
|
/* Now read the results */ |
|
|
|
mutex_lock(&state->lock); |
|
for (monitor = 0; monitor < NUM_MONITORS; monitor++) { |
|
typeof(state->monitor[0]) *mon = &(state->monitor[monitor]); |
|
|
|
/* Anything enabled? */ |
|
if (mon->bus_enabled == 0) { |
|
/* No, disable all monitoring for this monitor */ |
|
set_monitor_control(monitor, 0); |
|
} else { |
|
int i; |
|
|
|
for (i = 0; i < num_buses[monitor]; i++) { |
|
int bus = mon->current_bus[i]; |
|
|
|
read_bus_watcher(monitor, |
|
BW0_CTRL + i*BW_PITCH, |
|
(u32 *)&mon->results[bus].results); |
|
} |
|
} |
|
} |
|
mutex_unlock(&state->lock); |
|
} |
|
|
|
static int monitor_thread(void *data) |
|
{ |
|
struct rpi_axiperf *state = data; |
|
|
|
while (1) { |
|
monitor(state); |
|
|
|
if (kthread_should_stop()) |
|
return 0; |
|
} |
|
return 0; |
|
} |
|
|
|
static ssize_t myreader(struct file *fp, char __user *user_buffer, |
|
size_t count, loff_t *position) |
|
{ |
|
#define INIT_BUFF_SIZE 2048 |
|
|
|
int i; |
|
int idx = (int)(uintptr_t)(fp->private_data); |
|
int num_buses, cnt; |
|
char *string_buffer; |
|
int buff_size = INIT_BUFF_SIZE; |
|
char *p; |
|
typeof(state->monitor[0]) *mon = &(state->monitor[idx]); |
|
|
|
if (idx < 0 || idx > NUM_MONITORS) |
|
idx = 0; |
|
|
|
num_buses = idx == SYSTEM_MONITOR ? num_system_buses : num_vpu_buses; |
|
|
|
string_buffer = kmalloc(buff_size, GFP_KERNEL); |
|
|
|
if (!string_buffer) { |
|
dev_err(&state->dev->dev, |
|
"Failed temporary string allocation\n"); |
|
return 0; |
|
} |
|
|
|
p = string_buffer; |
|
|
|
mutex_lock(&state->lock); |
|
|
|
if (mon->bus_filter) { |
|
int filt = min(mon->bus_filter & 0x1f, num_bus_filters); |
|
|
|
cnt = snprintf(p, buff_size, |
|
"\nMonitoring transactions from %s only\n", |
|
bus_filter_strings[filt]); |
|
p += cnt; |
|
buff_size -= cnt; |
|
} |
|
|
|
cnt = snprintf(p, buff_size, " Bus | Atrans Atwait AMax Wtrans Wtwait WMax Rtrans Rtwait RMax\n" |
|
"======================================================================================================\n"); |
|
|
|
if (cnt >= buff_size) |
|
goto done; |
|
|
|
p += cnt; |
|
buff_size -= cnt; |
|
|
|
for (i = 0; i < num_buses; i++) { |
|
if (mon->bus_enabled & (1 << i)) { |
|
#define DIVIDER (1024) |
|
typeof(mon->results[0]) *res = &(mon->results[i]); |
|
|
|
cnt = snprintf(p, buff_size, |
|
"%10s | %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK\n", |
|
idx == SYSTEM_MONITOR ? |
|
system_bus_string[i] : |
|
vpu_bus_string[i], |
|
res->atrans/DIVIDER, |
|
res->atwait/DIVIDER, |
|
res->amax/DIVIDER, |
|
res->wtrans/DIVIDER, |
|
res->wtwait/DIVIDER, |
|
res->wmax/DIVIDER, |
|
res->rtrans/DIVIDER, |
|
res->rtwait/DIVIDER, |
|
res->rmax/DIVIDER |
|
); |
|
if (cnt >= buff_size) |
|
goto done; |
|
|
|
p += cnt; |
|
buff_size -= cnt; |
|
} |
|
} |
|
|
|
mutex_unlock(&state->lock); |
|
|
|
done: |
|
|
|
/* did the last string entry exceeed our buffer size? ie out of string |
|
* buffer space. Null terminate, use what we have. |
|
*/ |
|
if (cnt >= buff_size) { |
|
buff_size = 0; |
|
string_buffer[INIT_BUFF_SIZE] = 0; |
|
} |
|
|
|
cnt = simple_read_from_buffer(user_buffer, count, position, |
|
string_buffer, |
|
INIT_BUFF_SIZE - buff_size); |
|
|
|
kfree(string_buffer); |
|
|
|
return cnt; |
|
} |
|
|
|
static ssize_t mywriter(struct file *fp, const char __user *user_buffer, |
|
size_t count, loff_t *position) |
|
{ |
|
int idx = (int)(uintptr_t)(fp->private_data); |
|
|
|
if (idx < 0 || idx > NUM_MONITORS) |
|
idx = 0; |
|
|
|
/* At the moment, this does nothing, but in the future it could be |
|
* used to reset counters etc |
|
*/ |
|
return count; |
|
} |
|
|
|
static const struct file_operations fops_debug = { |
|
.read = myreader, |
|
.write = mywriter, |
|
.open = simple_open |
|
}; |
|
|
|
static int rpi_axiperf_probe(struct platform_device *pdev) |
|
{ |
|
int ret = 0, i; |
|
struct device *dev = &pdev->dev; |
|
struct device_node *np = dev->of_node; |
|
struct device_node *fw_node; |
|
|
|
state = kzalloc(sizeof(struct rpi_axiperf), GFP_KERNEL); |
|
if (!state) |
|
return -ENOMEM; |
|
|
|
/* Get the firmware handle for future rpi-firmware-xxx calls */ |
|
fw_node = of_parse_phandle(np, "firmware", 0); |
|
if (!fw_node) { |
|
dev_err(dev, "Missing firmware node\n"); |
|
return -ENOENT; |
|
} |
|
|
|
state->firmware = rpi_firmware_get(fw_node); |
|
if (!state->firmware) |
|
return -EPROBE_DEFER; |
|
|
|
/* Special case for the VPU monitor, we must use the mailbox interface |
|
* as it is not accessible from the ARM address space. |
|
*/ |
|
state->monitor[VPU_MONITOR].use_mailbox_interface = 1; |
|
state->monitor[SYSTEM_MONITOR].use_mailbox_interface = 0; |
|
|
|
for (i = 0; i < NUM_MONITORS; i++) { |
|
if (state->monitor[i].use_mailbox_interface) { |
|
of_property_read_u32_index(np, "reg", i*2, |
|
(u32 *)(&state->monitor[i].base_address)); |
|
} else { |
|
struct resource *resource = |
|
platform_get_resource(pdev, IORESOURCE_MEM, i); |
|
|
|
state->monitor[i].base_address = |
|
devm_ioremap_resource(&pdev->dev, resource); |
|
} |
|
|
|
if (IS_ERR(state->monitor[i].base_address)) |
|
return PTR_ERR(state->monitor[i].base_address); |
|
|
|
/* Enable all buses by default */ |
|
state->monitor[i].bus_enabled = 0xffff; |
|
} |
|
|
|
state->dev = pdev; |
|
platform_set_drvdata(pdev, state); |
|
|
|
state->sample_time = DEFAULT_SAMPLE_TIME; |
|
|
|
/* Set up all the debugfs stuff */ |
|
state->root_folder = debugfs_create_dir(KBUILD_MODNAME, NULL); |
|
|
|
for (i = 0; i < NUM_MONITORS; i++) { |
|
state->monitor[i].debugfs_entry = |
|
debugfs_create_dir(monitor_name[i], state->root_folder); |
|
if (IS_ERR(state->monitor[i].debugfs_entry)) |
|
state->monitor[i].debugfs_entry = NULL; |
|
|
|
debugfs_create_file("data", 0444, |
|
state->monitor[i].debugfs_entry, |
|
(void *)(uintptr_t)i, &fops_debug); |
|
debugfs_create_u32("enable", 0644, |
|
state->monitor[i].debugfs_entry, |
|
&state->monitor[i].bus_enabled); |
|
debugfs_create_u32("filter", 0644, |
|
state->monitor[i].debugfs_entry, |
|
&state->monitor[i].bus_filter); |
|
debugfs_create_u32("sample_time", 0644, |
|
state->monitor[i].debugfs_entry, |
|
&state->sample_time); |
|
} |
|
|
|
mutex_init(&state->lock); |
|
|
|
state->monitor_thread = kthread_run(monitor_thread, state, |
|
"rpi-axiperfmon"); |
|
|
|
return ret; |
|
|
|
} |
|
|
|
static int rpi_axiperf_remove(struct platform_device *dev) |
|
{ |
|
int ret = 0; |
|
|
|
kthread_stop(state->monitor_thread); |
|
|
|
debugfs_remove_recursive(state->root_folder); |
|
state->root_folder = NULL; |
|
|
|
return ret; |
|
} |
|
|
|
static const struct of_device_id rpi_axiperf_match[] = { |
|
{ |
|
.compatible = "brcm,bcm2835-axiperf", |
|
}, |
|
{}, |
|
}; |
|
MODULE_DEVICE_TABLE(of, rpi_axiperf_match); |
|
|
|
static struct platform_driver rpi_axiperf_driver = { |
|
.probe = rpi_axiperf_probe, |
|
.remove = rpi_axiperf_remove, |
|
.driver = { |
|
.name = "rpi-bcm2835-axiperf", |
|
.of_match_table = of_match_ptr(rpi_axiperf_match), |
|
}, |
|
}; |
|
|
|
module_platform_driver(rpi_axiperf_driver); |
|
|
|
/* Module information */ |
|
MODULE_AUTHOR("James Hughes <[email protected]>"); |
|
MODULE_DESCRIPTION("RPI AXI Performance monitor driver"); |
|
MODULE_LICENSE("GPL"); |
|
|
|
|