QortalOS Brooklyn for Raspberry Pi 4
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

637 lines
14 KiB

/*
* raspberrypi_axi_monitor.c
*
* Author: [email protected]
*
* Raspberry Pi AXI performance counters.
*
* Copyright (C) 2017 Raspberry Pi Trading Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/debugfs.h>
#include <linux/devcoredump.h>
#include <linux/device.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/mutex.h>
#include <linux/of.h>
#include <linux/platform_device.h>
#include <soc/bcm2835/raspberrypi-firmware.h>
#define NUM_MONITORS 2
#define NUM_BUS_WATCHERS_PER_MONITOR 3
#define SYSTEM_MONITOR 0
#define VPU_MONITOR 1
#define MAX_BUSES 16
#define DEFAULT_SAMPLE_TIME 100
#define NUM_BUS_WATCHER_RESULTS 9
struct bus_watcher_data {
union {
u32 results[NUM_BUS_WATCHER_RESULTS];
struct {
u32 atrans;
u32 atwait;
u32 amax;
u32 wtrans;
u32 wtwait;
u32 wmax;
u32 rtrans;
u32 rtwait;
u32 rmax;
};
};
};
struct rpi_axiperf {
struct platform_device *dev;
struct dentry *root_folder;
struct task_struct *monitor_thread;
struct mutex lock;
struct rpi_firmware *firmware;
/* Sample time spent on for each bus */
int sample_time;
/* Now storage for the per monitor settings and the resulting
* performance figures
*/
struct {
/* Bit field of buses we want to monitor */
int bus_enabled;
/* Bit field of buses to filter by */
int bus_filter;
/* The current buses being monitored on this monitor */
int current_bus[NUM_BUS_WATCHERS_PER_MONITOR];
/* The last bus monitored on this monitor */
int last_monitored;
/* Set true if this mailbox must use the mailbox interface
* rather than access registers directly.
*/
int use_mailbox_interface;
/* Current result values */
struct bus_watcher_data results[MAX_BUSES];
struct dentry *debugfs_entry;
void __iomem *base_address;
} monitor[NUM_MONITORS];
};
static struct rpi_axiperf *state;
/* Two monitors, System and VPU, each with the following register sets.
* Each monitor can only monitor one bus at a time, so we time share them,
* giving each bus 100ms (default, settable via debugfs) of time on its
* associated monitor
* Record results from the three Bus watchers per monitor and push to the sysfs
*/
/* general registers */
const int GEN_CTRL;
const int GEN_CTL_ENABLE_BIT = BIT(0);
const int GEN_CTL_RESET_BIT = BIT(1);
/* Bus watcher registers */
const int BW_PITCH = 0x40;
const int BW0_CTRL = 0x40;
const int BW1_CTRL = 0x80;
const int BW2_CTRL = 0xc0;
const int BW_ATRANS_OFFSET = 0x04;
const int BW_ATWAIT_OFFSET = 0x08;
const int BW_AMAX_OFFSET = 0x0c;
const int BW_WTRANS_OFFSET = 0x10;
const int BW_WTWAIT_OFFSET = 0x14;
const int BW_WMAX_OFFSET = 0x18;
const int BW_RTRANS_OFFSET = 0x1c;
const int BW_RTWAIT_OFFSET = 0x20;
const int BW_RMAX_OFFSET = 0x24;
const int BW_CTRL_RESET_BIT = BIT(31);
const int BW_CTRL_ENABLE_BIT = BIT(30);
const int BW_CTRL_ENABLE_ID_FILTER_BIT = BIT(29);
const int BW_CTRL_LIMIT_HALT_BIT = BIT(28);
const int BW_CTRL_SOURCE_SHIFT = 8;
const int BW_CTRL_SOURCE_MASK = GENMASK(12, 8); // 5 bits
const int BW_CTRL_BUS_WATCH_SHIFT;
const int BW_CTRL_BUS_WATCH_MASK = GENMASK(5, 0); // 6 bits
const int BW_CTRL_BUS_FILTER_SHIFT = 8;
const static char *bus_filter_strings[] = {
"",
"CORE0_V",
"ICACHE0",
"DCACHE0",
"CORE1_V",
"ICACHE1",
"DCACHE1",
"L2_MAIN",
"HOST_PORT",
"HOST_PORT2",
"HVS",
"ISP",
"VIDEO_DCT",
"VIDEO_SD2AXI",
"CAM0",
"CAM1",
"DMA0",
"DMA1",
"DMA2_VPU",
"JPEG",
"VIDEO_CME",
"TRANSPOSER",
"VIDEO_FME",
"CCP2TX",
"USB",
"V3D0",
"V3D1",
"V3D2",
"AVE",
"DEBUG",
"CPU",
"M30"
};
const int num_bus_filters = ARRAY_SIZE(bus_filter_strings);
const static char *system_bus_string[] = {
"DMA_L2",
"TRANS",
"JPEG",
"SYSTEM_UC",
"DMA_UC",
"SYSTEM_L2",
"CCP2TX",
"MPHI_RX",
"MPHI_TX",
"HVS",
"H264",
"ISP",
"V3D",
"PERIPHERAL",
"CPU_UC",
"CPU_L2"
};
const int num_system_buses = ARRAY_SIZE(system_bus_string);
const static char *vpu_bus_string[] = {
"VPU1_D_L2",
"VPU0_D_L2",
"VPU1_I_L2",
"VPU0_I_L2",
"SYSTEM_L2",
"L2_FLUSH",
"DMA_L2",
"VPU1_D_UC",
"VPU0_D_UC",
"VPU1_I_UC",
"VPU0_I_UC",
"SYSTEM_UC",
"L2_OUT",
"DMA_UC",
"SDRAM",
"L2_IN"
};
const int num_vpu_buses = ARRAY_SIZE(vpu_bus_string);
const static char *monitor_name[] = {
"System",
"VPU"
};
static inline void write_reg(int monitor, int reg, u32 value)
{
writel(value, state->monitor[monitor].base_address + reg);
}
static inline u32 read_reg(int monitor, u32 reg)
{
return readl(state->monitor[monitor].base_address + reg);
}
static void read_bus_watcher(int monitor, int watcher, u32 *results)
{
if (state->monitor[monitor].use_mailbox_interface) {
/* We have 9 results, plus the overheads of start address and
* length So 11 u32 to define
*/
u32 tmp[11];
int err;
tmp[0] = (u32)(uintptr_t)(state->monitor[monitor].base_address + watcher
+ BW_ATRANS_OFFSET);
tmp[1] = NUM_BUS_WATCHER_RESULTS;
err = rpi_firmware_property(state->firmware,
RPI_FIRMWARE_GET_PERIPH_REG,
tmp, sizeof(tmp));
if (err < 0 || tmp[1] != NUM_BUS_WATCHER_RESULTS)
dev_err_once(&state->dev->dev,
"Failed to read bus watcher");
else
memcpy(results, &tmp[2],
NUM_BUS_WATCHER_RESULTS * sizeof(u32));
} else {
int i;
void __iomem *addr = state->monitor[monitor].base_address
+ watcher + BW_ATRANS_OFFSET;
for (i = 0; i < NUM_BUS_WATCHER_RESULTS; i++, addr += 4)
*results++ = readl(addr);
}
}
static void set_monitor_control(int monitor, u32 set)
{
if (state->monitor[monitor].use_mailbox_interface) {
u32 tmp[3] = {(u32)(uintptr_t)(state->monitor[monitor].base_address +
GEN_CTRL), 1, set};
int err = rpi_firmware_property(state->firmware,
RPI_FIRMWARE_SET_PERIPH_REG,
tmp, sizeof(tmp));
if (err < 0 || tmp[1] != 1)
dev_err_once(&state->dev->dev,
"Failed to set monitor control");
} else
write_reg(monitor, GEN_CTRL, set);
}
static void set_bus_watcher_control(int monitor, int watcher, u32 set)
{
if (state->monitor[monitor].use_mailbox_interface) {
u32 tmp[3] = {(u32)(uintptr_t)(state->monitor[monitor].base_address +
watcher), 1, set};
int err = rpi_firmware_property(state->firmware,
RPI_FIRMWARE_SET_PERIPH_REG,
tmp, sizeof(tmp));
if (err < 0 || tmp[1] != 1)
dev_err_once(&state->dev->dev,
"Failed to set bus watcher control");
} else
write_reg(monitor, watcher, set);
}
static void monitor(struct rpi_axiperf *state)
{
int monitor, num_buses[NUM_MONITORS];
mutex_lock(&state->lock);
for (monitor = 0; monitor < NUM_MONITORS; monitor++) {
typeof(state->monitor[0]) *mon = &(state->monitor[monitor]);
/* Anything enabled? */
if (mon->bus_enabled == 0) {
/* No, disable all monitoring for this monitor */
set_monitor_control(monitor, GEN_CTL_RESET_BIT);
} else {
int i;
/* Find out how many busses we want to monitor, and
* spread our 3 actual monitors over them
*/
num_buses[monitor] = hweight32(mon->bus_enabled);
num_buses[monitor] = min(num_buses[monitor],
NUM_BUS_WATCHERS_PER_MONITOR);
for (i = 0; i < num_buses[monitor]; i++) {
int bus_control;
do {
mon->last_monitored++;
mon->last_monitored &= 0xf;
} while ((mon->bus_enabled &
(1 << mon->last_monitored)) == 0);
mon->current_bus[i] = mon->last_monitored;
/* Reset the counters */
set_bus_watcher_control(monitor,
BW0_CTRL +
i*BW_PITCH,
BW_CTRL_RESET_BIT);
bus_control = BW_CTRL_ENABLE_BIT |
mon->current_bus[i];
if (mon->bus_filter) {
bus_control |=
BW_CTRL_ENABLE_ID_FILTER_BIT;
bus_control |=
((mon->bus_filter & 0x1f)
<< BW_CTRL_BUS_FILTER_SHIFT);
}
// Start capture
set_bus_watcher_control(monitor,
BW0_CTRL + i*BW_PITCH,
bus_control);
}
}
/* start monitoring */
set_monitor_control(monitor, GEN_CTL_ENABLE_BIT);
}
mutex_unlock(&state->lock);
msleep(state->sample_time);
/* Now read the results */
mutex_lock(&state->lock);
for (monitor = 0; monitor < NUM_MONITORS; monitor++) {
typeof(state->monitor[0]) *mon = &(state->monitor[monitor]);
/* Anything enabled? */
if (mon->bus_enabled == 0) {
/* No, disable all monitoring for this monitor */
set_monitor_control(monitor, 0);
} else {
int i;
for (i = 0; i < num_buses[monitor]; i++) {
int bus = mon->current_bus[i];
read_bus_watcher(monitor,
BW0_CTRL + i*BW_PITCH,
(u32 *)&mon->results[bus].results);
}
}
}
mutex_unlock(&state->lock);
}
static int monitor_thread(void *data)
{
struct rpi_axiperf *state = data;
while (1) {
monitor(state);
if (kthread_should_stop())
return 0;
}
return 0;
}
static ssize_t myreader(struct file *fp, char __user *user_buffer,
size_t count, loff_t *position)
{
#define INIT_BUFF_SIZE 2048
int i;
int idx = (int)(uintptr_t)(fp->private_data);
int num_buses, cnt;
char *string_buffer;
int buff_size = INIT_BUFF_SIZE;
char *p;
typeof(state->monitor[0]) *mon = &(state->monitor[idx]);
if (idx < 0 || idx > NUM_MONITORS)
idx = 0;
num_buses = idx == SYSTEM_MONITOR ? num_system_buses : num_vpu_buses;
string_buffer = kmalloc(buff_size, GFP_KERNEL);
if (!string_buffer) {
dev_err(&state->dev->dev,
"Failed temporary string allocation\n");
return 0;
}
p = string_buffer;
mutex_lock(&state->lock);
if (mon->bus_filter) {
int filt = min(mon->bus_filter & 0x1f, num_bus_filters);
cnt = snprintf(p, buff_size,
"\nMonitoring transactions from %s only\n",
bus_filter_strings[filt]);
p += cnt;
buff_size -= cnt;
}
cnt = snprintf(p, buff_size, " Bus | Atrans Atwait AMax Wtrans Wtwait WMax Rtrans Rtwait RMax\n"
"======================================================================================================\n");
if (cnt >= buff_size)
goto done;
p += cnt;
buff_size -= cnt;
for (i = 0; i < num_buses; i++) {
if (mon->bus_enabled & (1 << i)) {
#define DIVIDER (1024)
typeof(mon->results[0]) *res = &(mon->results[i]);
cnt = snprintf(p, buff_size,
"%10s | %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK\n",
idx == SYSTEM_MONITOR ?
system_bus_string[i] :
vpu_bus_string[i],
res->atrans/DIVIDER,
res->atwait/DIVIDER,
res->amax/DIVIDER,
res->wtrans/DIVIDER,
res->wtwait/DIVIDER,
res->wmax/DIVIDER,
res->rtrans/DIVIDER,
res->rtwait/DIVIDER,
res->rmax/DIVIDER
);
if (cnt >= buff_size)
goto done;
p += cnt;
buff_size -= cnt;
}
}
mutex_unlock(&state->lock);
done:
/* did the last string entry exceeed our buffer size? ie out of string
* buffer space. Null terminate, use what we have.
*/
if (cnt >= buff_size) {
buff_size = 0;
string_buffer[INIT_BUFF_SIZE] = 0;
}
cnt = simple_read_from_buffer(user_buffer, count, position,
string_buffer,
INIT_BUFF_SIZE - buff_size);
kfree(string_buffer);
return cnt;
}
static ssize_t mywriter(struct file *fp, const char __user *user_buffer,
size_t count, loff_t *position)
{
int idx = (int)(uintptr_t)(fp->private_data);
if (idx < 0 || idx > NUM_MONITORS)
idx = 0;
/* At the moment, this does nothing, but in the future it could be
* used to reset counters etc
*/
return count;
}
static const struct file_operations fops_debug = {
.read = myreader,
.write = mywriter,
.open = simple_open
};
static int rpi_axiperf_probe(struct platform_device *pdev)
{
int ret = 0, i;
struct device *dev = &pdev->dev;
struct device_node *np = dev->of_node;
struct device_node *fw_node;
state = kzalloc(sizeof(struct rpi_axiperf), GFP_KERNEL);
if (!state)
return -ENOMEM;
/* Get the firmware handle for future rpi-firmware-xxx calls */
fw_node = of_parse_phandle(np, "firmware", 0);
if (!fw_node) {
dev_err(dev, "Missing firmware node\n");
return -ENOENT;
}
state->firmware = rpi_firmware_get(fw_node);
if (!state->firmware)
return -EPROBE_DEFER;
/* Special case for the VPU monitor, we must use the mailbox interface
* as it is not accessible from the ARM address space.
*/
state->monitor[VPU_MONITOR].use_mailbox_interface = 1;
state->monitor[SYSTEM_MONITOR].use_mailbox_interface = 0;
for (i = 0; i < NUM_MONITORS; i++) {
if (state->monitor[i].use_mailbox_interface) {
of_property_read_u32_index(np, "reg", i*2,
(u32 *)(&state->monitor[i].base_address));
} else {
struct resource *resource =
platform_get_resource(pdev, IORESOURCE_MEM, i);
state->monitor[i].base_address =
devm_ioremap_resource(&pdev->dev, resource);
}
if (IS_ERR(state->monitor[i].base_address))
return PTR_ERR(state->monitor[i].base_address);
/* Enable all buses by default */
state->monitor[i].bus_enabled = 0xffff;
}
state->dev = pdev;
platform_set_drvdata(pdev, state);
state->sample_time = DEFAULT_SAMPLE_TIME;
/* Set up all the debugfs stuff */
state->root_folder = debugfs_create_dir(KBUILD_MODNAME, NULL);
for (i = 0; i < NUM_MONITORS; i++) {
state->monitor[i].debugfs_entry =
debugfs_create_dir(monitor_name[i], state->root_folder);
if (IS_ERR(state->monitor[i].debugfs_entry))
state->monitor[i].debugfs_entry = NULL;
debugfs_create_file("data", 0444,
state->monitor[i].debugfs_entry,
(void *)(uintptr_t)i, &fops_debug);
debugfs_create_u32("enable", 0644,
state->monitor[i].debugfs_entry,
&state->monitor[i].bus_enabled);
debugfs_create_u32("filter", 0644,
state->monitor[i].debugfs_entry,
&state->monitor[i].bus_filter);
debugfs_create_u32("sample_time", 0644,
state->monitor[i].debugfs_entry,
&state->sample_time);
}
mutex_init(&state->lock);
state->monitor_thread = kthread_run(monitor_thread, state,
"rpi-axiperfmon");
return ret;
}
static int rpi_axiperf_remove(struct platform_device *dev)
{
int ret = 0;
kthread_stop(state->monitor_thread);
debugfs_remove_recursive(state->root_folder);
state->root_folder = NULL;
return ret;
}
static const struct of_device_id rpi_axiperf_match[] = {
{
.compatible = "brcm,bcm2835-axiperf",
},
{},
};
MODULE_DEVICE_TABLE(of, rpi_axiperf_match);
static struct platform_driver rpi_axiperf_driver = {
.probe = rpi_axiperf_probe,
.remove = rpi_axiperf_remove,
.driver = {
.name = "rpi-bcm2835-axiperf",
.of_match_table = of_match_ptr(rpi_axiperf_match),
},
};
module_platform_driver(rpi_axiperf_driver);
/* Module information */
MODULE_AUTHOR("James Hughes <[email protected]>");
MODULE_DESCRIPTION("RPI AXI Performance monitor driver");
MODULE_LICENSE("GPL");