forked from Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
676 lines
16 KiB
676 lines
16 KiB
// SPDX-License-Identifier: GPL-2.0 |
|
/* |
|
* Arch specific cpu topology information |
|
* |
|
* Copyright (C) 2016, ARM Ltd. |
|
* Written by: Juri Lelli, ARM Ltd. |
|
*/ |
|
|
|
#include <linux/acpi.h> |
|
#include <linux/cpu.h> |
|
#include <linux/cpufreq.h> |
|
#include <linux/device.h> |
|
#include <linux/of.h> |
|
#include <linux/slab.h> |
|
#include <linux/string.h> |
|
#include <linux/sched/topology.h> |
|
#include <linux/cpuset.h> |
|
#include <linux/cpumask.h> |
|
#include <linux/init.h> |
|
#include <linux/percpu.h> |
|
#include <linux/sched.h> |
|
#include <linux/smp.h> |
|
|
|
static DEFINE_PER_CPU(struct scale_freq_data *, sft_data); |
|
static struct cpumask scale_freq_counters_mask; |
|
static bool scale_freq_invariant; |
|
|
|
static bool supports_scale_freq_counters(const struct cpumask *cpus) |
|
{ |
|
return cpumask_subset(cpus, &scale_freq_counters_mask); |
|
} |
|
|
|
bool topology_scale_freq_invariant(void) |
|
{ |
|
return cpufreq_supports_freq_invariance() || |
|
supports_scale_freq_counters(cpu_online_mask); |
|
} |
|
|
|
static void update_scale_freq_invariant(bool status) |
|
{ |
|
if (scale_freq_invariant == status) |
|
return; |
|
|
|
/* |
|
* Task scheduler behavior depends on frequency invariance support, |
|
* either cpufreq or counter driven. If the support status changes as |
|
* a result of counter initialisation and use, retrigger the build of |
|
* scheduling domains to ensure the information is propagated properly. |
|
*/ |
|
if (topology_scale_freq_invariant() == status) { |
|
scale_freq_invariant = status; |
|
rebuild_sched_domains_energy(); |
|
} |
|
} |
|
|
|
void topology_set_scale_freq_source(struct scale_freq_data *data, |
|
const struct cpumask *cpus) |
|
{ |
|
struct scale_freq_data *sfd; |
|
int cpu; |
|
|
|
/* |
|
* Avoid calling rebuild_sched_domains() unnecessarily if FIE is |
|
* supported by cpufreq. |
|
*/ |
|
if (cpumask_empty(&scale_freq_counters_mask)) |
|
scale_freq_invariant = topology_scale_freq_invariant(); |
|
|
|
for_each_cpu(cpu, cpus) { |
|
sfd = per_cpu(sft_data, cpu); |
|
|
|
/* Use ARCH provided counters whenever possible */ |
|
if (!sfd || sfd->source != SCALE_FREQ_SOURCE_ARCH) { |
|
per_cpu(sft_data, cpu) = data; |
|
cpumask_set_cpu(cpu, &scale_freq_counters_mask); |
|
} |
|
} |
|
|
|
update_scale_freq_invariant(true); |
|
} |
|
EXPORT_SYMBOL_GPL(topology_set_scale_freq_source); |
|
|
|
void topology_clear_scale_freq_source(enum scale_freq_source source, |
|
const struct cpumask *cpus) |
|
{ |
|
struct scale_freq_data *sfd; |
|
int cpu; |
|
|
|
for_each_cpu(cpu, cpus) { |
|
sfd = per_cpu(sft_data, cpu); |
|
|
|
if (sfd && sfd->source == source) { |
|
per_cpu(sft_data, cpu) = NULL; |
|
cpumask_clear_cpu(cpu, &scale_freq_counters_mask); |
|
} |
|
} |
|
|
|
update_scale_freq_invariant(false); |
|
} |
|
EXPORT_SYMBOL_GPL(topology_clear_scale_freq_source); |
|
|
|
void topology_scale_freq_tick(void) |
|
{ |
|
struct scale_freq_data *sfd = *this_cpu_ptr(&sft_data); |
|
|
|
if (sfd) |
|
sfd->set_freq_scale(); |
|
} |
|
|
|
DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE; |
|
EXPORT_PER_CPU_SYMBOL_GPL(arch_freq_scale); |
|
|
|
void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq, |
|
unsigned long max_freq) |
|
{ |
|
unsigned long scale; |
|
int i; |
|
|
|
if (WARN_ON_ONCE(!cur_freq || !max_freq)) |
|
return; |
|
|
|
/* |
|
* If the use of counters for FIE is enabled, just return as we don't |
|
* want to update the scale factor with information from CPUFREQ. |
|
* Instead the scale factor will be updated from arch_scale_freq_tick. |
|
*/ |
|
if (supports_scale_freq_counters(cpus)) |
|
return; |
|
|
|
scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq; |
|
|
|
for_each_cpu(i, cpus) |
|
per_cpu(arch_freq_scale, i) = scale; |
|
} |
|
|
|
DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; |
|
|
|
void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity) |
|
{ |
|
per_cpu(cpu_scale, cpu) = capacity; |
|
} |
|
|
|
DEFINE_PER_CPU(unsigned long, thermal_pressure); |
|
|
|
void topology_set_thermal_pressure(const struct cpumask *cpus, |
|
unsigned long th_pressure) |
|
{ |
|
int cpu; |
|
|
|
for_each_cpu(cpu, cpus) |
|
WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure); |
|
} |
|
|
|
static ssize_t cpu_capacity_show(struct device *dev, |
|
struct device_attribute *attr, |
|
char *buf) |
|
{ |
|
struct cpu *cpu = container_of(dev, struct cpu, dev); |
|
|
|
return sysfs_emit(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id)); |
|
} |
|
|
|
static void update_topology_flags_workfn(struct work_struct *work); |
|
static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn); |
|
|
|
static DEVICE_ATTR_RO(cpu_capacity); |
|
|
|
static int register_cpu_capacity_sysctl(void) |
|
{ |
|
int i; |
|
struct device *cpu; |
|
|
|
for_each_possible_cpu(i) { |
|
cpu = get_cpu_device(i); |
|
if (!cpu) { |
|
pr_err("%s: too early to get CPU%d device!\n", |
|
__func__, i); |
|
continue; |
|
} |
|
device_create_file(cpu, &dev_attr_cpu_capacity); |
|
} |
|
|
|
return 0; |
|
} |
|
subsys_initcall(register_cpu_capacity_sysctl); |
|
|
|
static int update_topology; |
|
|
|
int topology_update_cpu_topology(void) |
|
{ |
|
return update_topology; |
|
} |
|
|
|
/* |
|
* Updating the sched_domains can't be done directly from cpufreq callbacks |
|
* due to locking, so queue the work for later. |
|
*/ |
|
static void update_topology_flags_workfn(struct work_struct *work) |
|
{ |
|
update_topology = 1; |
|
rebuild_sched_domains(); |
|
pr_debug("sched_domain hierarchy rebuilt, flags updated\n"); |
|
update_topology = 0; |
|
} |
|
|
|
static DEFINE_PER_CPU(u32, freq_factor) = 1; |
|
static u32 *raw_capacity; |
|
|
|
static int free_raw_capacity(void) |
|
{ |
|
kfree(raw_capacity); |
|
raw_capacity = NULL; |
|
|
|
return 0; |
|
} |
|
|
|
void topology_normalize_cpu_scale(void) |
|
{ |
|
u64 capacity; |
|
u64 capacity_scale; |
|
int cpu; |
|
|
|
if (!raw_capacity) |
|
return; |
|
|
|
capacity_scale = 1; |
|
for_each_possible_cpu(cpu) { |
|
capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu); |
|
capacity_scale = max(capacity, capacity_scale); |
|
} |
|
|
|
pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale); |
|
for_each_possible_cpu(cpu) { |
|
capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu); |
|
capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT, |
|
capacity_scale); |
|
topology_set_cpu_scale(cpu, capacity); |
|
pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n", |
|
cpu, topology_get_cpu_scale(cpu)); |
|
} |
|
} |
|
|
|
bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu) |
|
{ |
|
struct clk *cpu_clk; |
|
static bool cap_parsing_failed; |
|
int ret; |
|
u32 cpu_capacity; |
|
|
|
if (cap_parsing_failed) |
|
return false; |
|
|
|
ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz", |
|
&cpu_capacity); |
|
if (!ret) { |
|
if (!raw_capacity) { |
|
raw_capacity = kcalloc(num_possible_cpus(), |
|
sizeof(*raw_capacity), |
|
GFP_KERNEL); |
|
if (!raw_capacity) { |
|
cap_parsing_failed = true; |
|
return false; |
|
} |
|
} |
|
raw_capacity[cpu] = cpu_capacity; |
|
pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n", |
|
cpu_node, raw_capacity[cpu]); |
|
|
|
/* |
|
* Update freq_factor for calculating early boot cpu capacities. |
|
* For non-clk CPU DVFS mechanism, there's no way to get the |
|
* frequency value now, assuming they are running at the same |
|
* frequency (by keeping the initial freq_factor value). |
|
*/ |
|
cpu_clk = of_clk_get(cpu_node, 0); |
|
if (!PTR_ERR_OR_ZERO(cpu_clk)) { |
|
per_cpu(freq_factor, cpu) = |
|
clk_get_rate(cpu_clk) / 1000; |
|
clk_put(cpu_clk); |
|
} |
|
} else { |
|
if (raw_capacity) { |
|
pr_err("cpu_capacity: missing %pOF raw capacity\n", |
|
cpu_node); |
|
pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n"); |
|
} |
|
cap_parsing_failed = true; |
|
free_raw_capacity(); |
|
} |
|
|
|
return !ret; |
|
} |
|
|
|
#ifdef CONFIG_CPU_FREQ |
|
static cpumask_var_t cpus_to_visit; |
|
static void parsing_done_workfn(struct work_struct *work); |
|
static DECLARE_WORK(parsing_done_work, parsing_done_workfn); |
|
|
|
static int |
|
init_cpu_capacity_callback(struct notifier_block *nb, |
|
unsigned long val, |
|
void *data) |
|
{ |
|
struct cpufreq_policy *policy = data; |
|
int cpu; |
|
|
|
if (!raw_capacity) |
|
return 0; |
|
|
|
if (val != CPUFREQ_CREATE_POLICY) |
|
return 0; |
|
|
|
pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n", |
|
cpumask_pr_args(policy->related_cpus), |
|
cpumask_pr_args(cpus_to_visit)); |
|
|
|
cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus); |
|
|
|
for_each_cpu(cpu, policy->related_cpus) |
|
per_cpu(freq_factor, cpu) = policy->cpuinfo.max_freq / 1000; |
|
|
|
if (cpumask_empty(cpus_to_visit)) { |
|
topology_normalize_cpu_scale(); |
|
schedule_work(&update_topology_flags_work); |
|
free_raw_capacity(); |
|
pr_debug("cpu_capacity: parsing done\n"); |
|
schedule_work(&parsing_done_work); |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
static struct notifier_block init_cpu_capacity_notifier = { |
|
.notifier_call = init_cpu_capacity_callback, |
|
}; |
|
|
|
static int __init register_cpufreq_notifier(void) |
|
{ |
|
int ret; |
|
|
|
/* |
|
* on ACPI-based systems we need to use the default cpu capacity |
|
* until we have the necessary code to parse the cpu capacity, so |
|
* skip registering cpufreq notifier. |
|
*/ |
|
if (!acpi_disabled || !raw_capacity) |
|
return -EINVAL; |
|
|
|
if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) |
|
return -ENOMEM; |
|
|
|
cpumask_copy(cpus_to_visit, cpu_possible_mask); |
|
|
|
ret = cpufreq_register_notifier(&init_cpu_capacity_notifier, |
|
CPUFREQ_POLICY_NOTIFIER); |
|
|
|
if (ret) |
|
free_cpumask_var(cpus_to_visit); |
|
|
|
return ret; |
|
} |
|
core_initcall(register_cpufreq_notifier); |
|
|
|
static void parsing_done_workfn(struct work_struct *work) |
|
{ |
|
cpufreq_unregister_notifier(&init_cpu_capacity_notifier, |
|
CPUFREQ_POLICY_NOTIFIER); |
|
free_cpumask_var(cpus_to_visit); |
|
} |
|
|
|
#else |
|
core_initcall(free_raw_capacity); |
|
#endif |
|
|
|
#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV) |
|
/* |
|
* This function returns the logic cpu number of the node. |
|
* There are basically three kinds of return values: |
|
* (1) logic cpu number which is > 0. |
|
* (2) -ENODEV when the device tree(DT) node is valid and found in the DT but |
|
* there is no possible logical CPU in the kernel to match. This happens |
|
* when CONFIG_NR_CPUS is configure to be smaller than the number of |
|
* CPU nodes in DT. We need to just ignore this case. |
|
* (3) -1 if the node does not exist in the device tree |
|
*/ |
|
static int __init get_cpu_for_node(struct device_node *node) |
|
{ |
|
struct device_node *cpu_node; |
|
int cpu; |
|
|
|
cpu_node = of_parse_phandle(node, "cpu", 0); |
|
if (!cpu_node) |
|
return -1; |
|
|
|
cpu = of_cpu_node_to_id(cpu_node); |
|
if (cpu >= 0) |
|
topology_parse_cpu_capacity(cpu_node, cpu); |
|
else |
|
pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n", |
|
cpu_node, cpumask_pr_args(cpu_possible_mask)); |
|
|
|
of_node_put(cpu_node); |
|
return cpu; |
|
} |
|
|
|
static int __init parse_core(struct device_node *core, int package_id, |
|
int core_id) |
|
{ |
|
char name[20]; |
|
bool leaf = true; |
|
int i = 0; |
|
int cpu; |
|
struct device_node *t; |
|
|
|
do { |
|
snprintf(name, sizeof(name), "thread%d", i); |
|
t = of_get_child_by_name(core, name); |
|
if (t) { |
|
leaf = false; |
|
cpu = get_cpu_for_node(t); |
|
if (cpu >= 0) { |
|
cpu_topology[cpu].package_id = package_id; |
|
cpu_topology[cpu].core_id = core_id; |
|
cpu_topology[cpu].thread_id = i; |
|
} else if (cpu != -ENODEV) { |
|
pr_err("%pOF: Can't get CPU for thread\n", t); |
|
of_node_put(t); |
|
return -EINVAL; |
|
} |
|
of_node_put(t); |
|
} |
|
i++; |
|
} while (t); |
|
|
|
cpu = get_cpu_for_node(core); |
|
if (cpu >= 0) { |
|
if (!leaf) { |
|
pr_err("%pOF: Core has both threads and CPU\n", |
|
core); |
|
return -EINVAL; |
|
} |
|
|
|
cpu_topology[cpu].package_id = package_id; |
|
cpu_topology[cpu].core_id = core_id; |
|
} else if (leaf && cpu != -ENODEV) { |
|
pr_err("%pOF: Can't get CPU for leaf core\n", core); |
|
return -EINVAL; |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
static int __init parse_cluster(struct device_node *cluster, int depth) |
|
{ |
|
char name[20]; |
|
bool leaf = true; |
|
bool has_cores = false; |
|
struct device_node *c; |
|
static int package_id __initdata; |
|
int core_id = 0; |
|
int i, ret; |
|
|
|
/* |
|
* First check for child clusters; we currently ignore any |
|
* information about the nesting of clusters and present the |
|
* scheduler with a flat list of them. |
|
*/ |
|
i = 0; |
|
do { |
|
snprintf(name, sizeof(name), "cluster%d", i); |
|
c = of_get_child_by_name(cluster, name); |
|
if (c) { |
|
leaf = false; |
|
ret = parse_cluster(c, depth + 1); |
|
of_node_put(c); |
|
if (ret != 0) |
|
return ret; |
|
} |
|
i++; |
|
} while (c); |
|
|
|
/* Now check for cores */ |
|
i = 0; |
|
do { |
|
snprintf(name, sizeof(name), "core%d", i); |
|
c = of_get_child_by_name(cluster, name); |
|
if (c) { |
|
has_cores = true; |
|
|
|
if (depth == 0) { |
|
pr_err("%pOF: cpu-map children should be clusters\n", |
|
c); |
|
of_node_put(c); |
|
return -EINVAL; |
|
} |
|
|
|
if (leaf) { |
|
ret = parse_core(c, package_id, core_id++); |
|
} else { |
|
pr_err("%pOF: Non-leaf cluster with core %s\n", |
|
cluster, name); |
|
ret = -EINVAL; |
|
} |
|
|
|
of_node_put(c); |
|
if (ret != 0) |
|
return ret; |
|
} |
|
i++; |
|
} while (c); |
|
|
|
if (leaf && !has_cores) |
|
pr_warn("%pOF: empty cluster\n", cluster); |
|
|
|
if (leaf) |
|
package_id++; |
|
|
|
return 0; |
|
} |
|
|
|
static int __init parse_dt_topology(void) |
|
{ |
|
struct device_node *cn, *map; |
|
int ret = 0; |
|
int cpu; |
|
|
|
cn = of_find_node_by_path("/cpus"); |
|
if (!cn) { |
|
pr_err("No CPU information found in DT\n"); |
|
return 0; |
|
} |
|
|
|
/* |
|
* When topology is provided cpu-map is essentially a root |
|
* cluster with restricted subnodes. |
|
*/ |
|
map = of_get_child_by_name(cn, "cpu-map"); |
|
if (!map) |
|
goto out; |
|
|
|
ret = parse_cluster(map, 0); |
|
if (ret != 0) |
|
goto out_map; |
|
|
|
topology_normalize_cpu_scale(); |
|
|
|
/* |
|
* Check that all cores are in the topology; the SMP code will |
|
* only mark cores described in the DT as possible. |
|
*/ |
|
for_each_possible_cpu(cpu) |
|
if (cpu_topology[cpu].package_id == -1) |
|
ret = -EINVAL; |
|
|
|
out_map: |
|
of_node_put(map); |
|
out: |
|
of_node_put(cn); |
|
return ret; |
|
} |
|
#endif |
|
|
|
/* |
|
* cpu topology table |
|
*/ |
|
struct cpu_topology cpu_topology[NR_CPUS]; |
|
EXPORT_SYMBOL_GPL(cpu_topology); |
|
|
|
const struct cpumask *cpu_coregroup_mask(int cpu) |
|
{ |
|
const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu)); |
|
|
|
/* Find the smaller of NUMA, core or LLC siblings */ |
|
if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) { |
|
/* not numa in package, lets use the package siblings */ |
|
core_mask = &cpu_topology[cpu].core_sibling; |
|
} |
|
if (cpu_topology[cpu].llc_id != -1) { |
|
if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask)) |
|
core_mask = &cpu_topology[cpu].llc_sibling; |
|
} |
|
|
|
return core_mask; |
|
} |
|
|
|
void update_siblings_masks(unsigned int cpuid) |
|
{ |
|
struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; |
|
int cpu; |
|
|
|
/* update core and thread sibling masks */ |
|
for_each_online_cpu(cpu) { |
|
cpu_topo = &cpu_topology[cpu]; |
|
|
|
if (cpuid_topo->llc_id == cpu_topo->llc_id) { |
|
cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling); |
|
cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling); |
|
} |
|
|
|
if (cpuid_topo->package_id != cpu_topo->package_id) |
|
continue; |
|
|
|
cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); |
|
cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); |
|
|
|
if (cpuid_topo->core_id != cpu_topo->core_id) |
|
continue; |
|
|
|
cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); |
|
cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); |
|
} |
|
} |
|
|
|
static void clear_cpu_topology(int cpu) |
|
{ |
|
struct cpu_topology *cpu_topo = &cpu_topology[cpu]; |
|
|
|
cpumask_clear(&cpu_topo->llc_sibling); |
|
cpumask_set_cpu(cpu, &cpu_topo->llc_sibling); |
|
|
|
cpumask_clear(&cpu_topo->core_sibling); |
|
cpumask_set_cpu(cpu, &cpu_topo->core_sibling); |
|
cpumask_clear(&cpu_topo->thread_sibling); |
|
cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); |
|
} |
|
|
|
void __init reset_cpu_topology(void) |
|
{ |
|
unsigned int cpu; |
|
|
|
for_each_possible_cpu(cpu) { |
|
struct cpu_topology *cpu_topo = &cpu_topology[cpu]; |
|
|
|
cpu_topo->thread_id = -1; |
|
cpu_topo->core_id = -1; |
|
cpu_topo->package_id = -1; |
|
cpu_topo->llc_id = -1; |
|
|
|
clear_cpu_topology(cpu); |
|
} |
|
} |
|
|
|
void remove_cpu_topology(unsigned int cpu) |
|
{ |
|
int sibling; |
|
|
|
for_each_cpu(sibling, topology_core_cpumask(cpu)) |
|
cpumask_clear_cpu(cpu, topology_core_cpumask(sibling)); |
|
for_each_cpu(sibling, topology_sibling_cpumask(cpu)) |
|
cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); |
|
for_each_cpu(sibling, topology_llc_cpumask(cpu)) |
|
cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling)); |
|
|
|
clear_cpu_topology(cpu); |
|
} |
|
|
|
__weak int __init parse_acpi_topology(void) |
|
{ |
|
return 0; |
|
} |
|
|
|
#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV) |
|
void __init init_cpu_topology(void) |
|
{ |
|
reset_cpu_topology(); |
|
|
|
/* |
|
* Discard anything that was parsed if we hit an error so we |
|
* don't use partial information. |
|
*/ |
|
if (parse_acpi_topology()) |
|
reset_cpu_topology(); |
|
else if (of_have_populated_dt() && parse_dt_topology()) |
|
reset_cpu_topology(); |
|
} |
|
#endif
|
|
|