mirror of https://github.com/Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
732 lines
18 KiB
732 lines
18 KiB
// SPDX-License-Identifier: GPL-2.0 |
|
#include <linux/slab.h> |
|
#include <linux/lockdep.h> |
|
#include <linux/sysfs.h> |
|
#include <linux/kobject.h> |
|
#include <linux/memory.h> |
|
#include <linux/memory-tiers.h> |
|
|
|
#include "internal.h" |
|
|
|
struct memory_tier { |
|
/* hierarchy of memory tiers */ |
|
struct list_head list; |
|
/* list of all memory types part of this tier */ |
|
struct list_head memory_types; |
|
/* |
|
* start value of abstract distance. memory tier maps |
|
* an abstract distance range, |
|
* adistance_start .. adistance_start + MEMTIER_CHUNK_SIZE |
|
*/ |
|
int adistance_start; |
|
struct device dev; |
|
/* All the nodes that are part of all the lower memory tiers. */ |
|
nodemask_t lower_tier_mask; |
|
}; |
|
|
|
struct demotion_nodes { |
|
nodemask_t preferred; |
|
}; |
|
|
|
struct node_memory_type_map { |
|
struct memory_dev_type *memtype; |
|
int map_count; |
|
}; |
|
|
|
static DEFINE_MUTEX(memory_tier_lock); |
|
static LIST_HEAD(memory_tiers); |
|
static struct node_memory_type_map node_memory_types[MAX_NUMNODES]; |
|
static struct memory_dev_type *default_dram_type; |
|
|
|
static struct bus_type memory_tier_subsys = { |
|
.name = "memory_tiering", |
|
.dev_name = "memory_tier", |
|
}; |
|
|
|
#ifdef CONFIG_MIGRATION |
|
static int top_tier_adistance; |
|
/* |
|
* node_demotion[] examples: |
|
* |
|
* Example 1: |
|
* |
|
* Node 0 & 1 are CPU + DRAM nodes, node 2 & 3 are PMEM nodes. |
|
* |
|
* node distances: |
|
* node 0 1 2 3 |
|
* 0 10 20 30 40 |
|
* 1 20 10 40 30 |
|
* 2 30 40 10 40 |
|
* 3 40 30 40 10 |
|
* |
|
* memory_tiers0 = 0-1 |
|
* memory_tiers1 = 2-3 |
|
* |
|
* node_demotion[0].preferred = 2 |
|
* node_demotion[1].preferred = 3 |
|
* node_demotion[2].preferred = <empty> |
|
* node_demotion[3].preferred = <empty> |
|
* |
|
* Example 2: |
|
* |
|
* Node 0 & 1 are CPU + DRAM nodes, node 2 is memory-only DRAM node. |
|
* |
|
* node distances: |
|
* node 0 1 2 |
|
* 0 10 20 30 |
|
* 1 20 10 30 |
|
* 2 30 30 10 |
|
* |
|
* memory_tiers0 = 0-2 |
|
* |
|
* node_demotion[0].preferred = <empty> |
|
* node_demotion[1].preferred = <empty> |
|
* node_demotion[2].preferred = <empty> |
|
* |
|
* Example 3: |
|
* |
|
* Node 0 is CPU + DRAM nodes, Node 1 is HBM node, node 2 is PMEM node. |
|
* |
|
* node distances: |
|
* node 0 1 2 |
|
* 0 10 20 30 |
|
* 1 20 10 40 |
|
* 2 30 40 10 |
|
* |
|
* memory_tiers0 = 1 |
|
* memory_tiers1 = 0 |
|
* memory_tiers2 = 2 |
|
* |
|
* node_demotion[0].preferred = 2 |
|
* node_demotion[1].preferred = 0 |
|
* node_demotion[2].preferred = <empty> |
|
* |
|
*/ |
|
static struct demotion_nodes *node_demotion __read_mostly; |
|
#endif /* CONFIG_MIGRATION */ |
|
|
|
static inline struct memory_tier *to_memory_tier(struct device *device) |
|
{ |
|
return container_of(device, struct memory_tier, dev); |
|
} |
|
|
|
static __always_inline nodemask_t get_memtier_nodemask(struct memory_tier *memtier) |
|
{ |
|
nodemask_t nodes = NODE_MASK_NONE; |
|
struct memory_dev_type *memtype; |
|
|
|
list_for_each_entry(memtype, &memtier->memory_types, tier_sibiling) |
|
nodes_or(nodes, nodes, memtype->nodes); |
|
|
|
return nodes; |
|
} |
|
|
|
static void memory_tier_device_release(struct device *dev) |
|
{ |
|
struct memory_tier *tier = to_memory_tier(dev); |
|
/* |
|
* synchronize_rcu in clear_node_memory_tier makes sure |
|
* we don't have rcu access to this memory tier. |
|
*/ |
|
kfree(tier); |
|
} |
|
|
|
static ssize_t nodelist_show(struct device *dev, |
|
struct device_attribute *attr, char *buf) |
|
{ |
|
int ret; |
|
nodemask_t nmask; |
|
|
|
mutex_lock(&memory_tier_lock); |
|
nmask = get_memtier_nodemask(to_memory_tier(dev)); |
|
ret = sysfs_emit(buf, "%*pbl\n", nodemask_pr_args(&nmask)); |
|
mutex_unlock(&memory_tier_lock); |
|
return ret; |
|
} |
|
static DEVICE_ATTR_RO(nodelist); |
|
|
|
static struct attribute *memtier_dev_attrs[] = { |
|
&dev_attr_nodelist.attr, |
|
NULL |
|
}; |
|
|
|
static const struct attribute_group memtier_dev_group = { |
|
.attrs = memtier_dev_attrs, |
|
}; |
|
|
|
static const struct attribute_group *memtier_dev_groups[] = { |
|
&memtier_dev_group, |
|
NULL |
|
}; |
|
|
|
static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memtype) |
|
{ |
|
int ret; |
|
bool found_slot = false; |
|
struct memory_tier *memtier, *new_memtier; |
|
int adistance = memtype->adistance; |
|
unsigned int memtier_adistance_chunk_size = MEMTIER_CHUNK_SIZE; |
|
|
|
lockdep_assert_held_once(&memory_tier_lock); |
|
|
|
adistance = round_down(adistance, memtier_adistance_chunk_size); |
|
/* |
|
* If the memtype is already part of a memory tier, |
|
* just return that. |
|
*/ |
|
if (!list_empty(&memtype->tier_sibiling)) { |
|
list_for_each_entry(memtier, &memory_tiers, list) { |
|
if (adistance == memtier->adistance_start) |
|
return memtier; |
|
} |
|
WARN_ON(1); |
|
return ERR_PTR(-EINVAL); |
|
} |
|
|
|
list_for_each_entry(memtier, &memory_tiers, list) { |
|
if (adistance == memtier->adistance_start) { |
|
goto link_memtype; |
|
} else if (adistance < memtier->adistance_start) { |
|
found_slot = true; |
|
break; |
|
} |
|
} |
|
|
|
new_memtier = kzalloc(sizeof(struct memory_tier), GFP_KERNEL); |
|
if (!new_memtier) |
|
return ERR_PTR(-ENOMEM); |
|
|
|
new_memtier->adistance_start = adistance; |
|
INIT_LIST_HEAD(&new_memtier->list); |
|
INIT_LIST_HEAD(&new_memtier->memory_types); |
|
if (found_slot) |
|
list_add_tail(&new_memtier->list, &memtier->list); |
|
else |
|
list_add_tail(&new_memtier->list, &memory_tiers); |
|
|
|
new_memtier->dev.id = adistance >> MEMTIER_CHUNK_BITS; |
|
new_memtier->dev.bus = &memory_tier_subsys; |
|
new_memtier->dev.release = memory_tier_device_release; |
|
new_memtier->dev.groups = memtier_dev_groups; |
|
|
|
ret = device_register(&new_memtier->dev); |
|
if (ret) { |
|
list_del(&memtier->list); |
|
put_device(&memtier->dev); |
|
return ERR_PTR(ret); |
|
} |
|
memtier = new_memtier; |
|
|
|
link_memtype: |
|
list_add(&memtype->tier_sibiling, &memtier->memory_types); |
|
return memtier; |
|
} |
|
|
|
static struct memory_tier *__node_get_memory_tier(int node) |
|
{ |
|
pg_data_t *pgdat; |
|
|
|
pgdat = NODE_DATA(node); |
|
if (!pgdat) |
|
return NULL; |
|
/* |
|
* Since we hold memory_tier_lock, we can avoid |
|
* RCU read locks when accessing the details. No |
|
* parallel updates are possible here. |
|
*/ |
|
return rcu_dereference_check(pgdat->memtier, |
|
lockdep_is_held(&memory_tier_lock)); |
|
} |
|
|
|
#ifdef CONFIG_MIGRATION |
|
bool node_is_toptier(int node) |
|
{ |
|
bool toptier; |
|
pg_data_t *pgdat; |
|
struct memory_tier *memtier; |
|
|
|
pgdat = NODE_DATA(node); |
|
if (!pgdat) |
|
return false; |
|
|
|
rcu_read_lock(); |
|
memtier = rcu_dereference(pgdat->memtier); |
|
if (!memtier) { |
|
toptier = true; |
|
goto out; |
|
} |
|
if (memtier->adistance_start <= top_tier_adistance) |
|
toptier = true; |
|
else |
|
toptier = false; |
|
out: |
|
rcu_read_unlock(); |
|
return toptier; |
|
} |
|
|
|
void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets) |
|
{ |
|
struct memory_tier *memtier; |
|
|
|
/* |
|
* pg_data_t.memtier updates includes a synchronize_rcu() |
|
* which ensures that we either find NULL or a valid memtier |
|
* in NODE_DATA. protect the access via rcu_read_lock(); |
|
*/ |
|
rcu_read_lock(); |
|
memtier = rcu_dereference(pgdat->memtier); |
|
if (memtier) |
|
*targets = memtier->lower_tier_mask; |
|
else |
|
*targets = NODE_MASK_NONE; |
|
rcu_read_unlock(); |
|
} |
|
|
|
/** |
|
* next_demotion_node() - Get the next node in the demotion path |
|
* @node: The starting node to lookup the next node |
|
* |
|
* Return: node id for next memory node in the demotion path hierarchy |
|
* from @node; NUMA_NO_NODE if @node is terminal. This does not keep |
|
* @node online or guarantee that it *continues* to be the next demotion |
|
* target. |
|
*/ |
|
int next_demotion_node(int node) |
|
{ |
|
struct demotion_nodes *nd; |
|
int target; |
|
|
|
if (!node_demotion) |
|
return NUMA_NO_NODE; |
|
|
|
nd = &node_demotion[node]; |
|
|
|
/* |
|
* node_demotion[] is updated without excluding this |
|
* function from running. |
|
* |
|
* Make sure to use RCU over entire code blocks if |
|
* node_demotion[] reads need to be consistent. |
|
*/ |
|
rcu_read_lock(); |
|
/* |
|
* If there are multiple target nodes, just select one |
|
* target node randomly. |
|
* |
|
* In addition, we can also use round-robin to select |
|
* target node, but we should introduce another variable |
|
* for node_demotion[] to record last selected target node, |
|
* that may cause cache ping-pong due to the changing of |
|
* last target node. Or introducing per-cpu data to avoid |
|
* caching issue, which seems more complicated. So selecting |
|
* target node randomly seems better until now. |
|
*/ |
|
target = node_random(&nd->preferred); |
|
rcu_read_unlock(); |
|
|
|
return target; |
|
} |
|
|
|
static void disable_all_demotion_targets(void) |
|
{ |
|
struct memory_tier *memtier; |
|
int node; |
|
|
|
for_each_node_state(node, N_MEMORY) { |
|
node_demotion[node].preferred = NODE_MASK_NONE; |
|
/* |
|
* We are holding memory_tier_lock, it is safe |
|
* to access pgda->memtier. |
|
*/ |
|
memtier = __node_get_memory_tier(node); |
|
if (memtier) |
|
memtier->lower_tier_mask = NODE_MASK_NONE; |
|
} |
|
/* |
|
* Ensure that the "disable" is visible across the system. |
|
* Readers will see either a combination of before+disable |
|
* state or disable+after. They will never see before and |
|
* after state together. |
|
*/ |
|
synchronize_rcu(); |
|
} |
|
|
|
/* |
|
* Find an automatic demotion target for all memory |
|
* nodes. Failing here is OK. It might just indicate |
|
* being at the end of a chain. |
|
*/ |
|
static void establish_demotion_targets(void) |
|
{ |
|
struct memory_tier *memtier; |
|
struct demotion_nodes *nd; |
|
int target = NUMA_NO_NODE, node; |
|
int distance, best_distance; |
|
nodemask_t tier_nodes, lower_tier; |
|
|
|
lockdep_assert_held_once(&memory_tier_lock); |
|
|
|
if (!node_demotion || !IS_ENABLED(CONFIG_MIGRATION)) |
|
return; |
|
|
|
disable_all_demotion_targets(); |
|
|
|
for_each_node_state(node, N_MEMORY) { |
|
best_distance = -1; |
|
nd = &node_demotion[node]; |
|
|
|
memtier = __node_get_memory_tier(node); |
|
if (!memtier || list_is_last(&memtier->list, &memory_tiers)) |
|
continue; |
|
/* |
|
* Get the lower memtier to find the demotion node list. |
|
*/ |
|
memtier = list_next_entry(memtier, list); |
|
tier_nodes = get_memtier_nodemask(memtier); |
|
/* |
|
* find_next_best_node, use 'used' nodemask as a skip list. |
|
* Add all memory nodes except the selected memory tier |
|
* nodelist to skip list so that we find the best node from the |
|
* memtier nodelist. |
|
*/ |
|
nodes_andnot(tier_nodes, node_states[N_MEMORY], tier_nodes); |
|
|
|
/* |
|
* Find all the nodes in the memory tier node list of same best distance. |
|
* add them to the preferred mask. We randomly select between nodes |
|
* in the preferred mask when allocating pages during demotion. |
|
*/ |
|
do { |
|
target = find_next_best_node(node, &tier_nodes); |
|
if (target == NUMA_NO_NODE) |
|
break; |
|
|
|
distance = node_distance(node, target); |
|
if (distance == best_distance || best_distance == -1) { |
|
best_distance = distance; |
|
node_set(target, nd->preferred); |
|
} else { |
|
break; |
|
} |
|
} while (1); |
|
} |
|
/* |
|
* Promotion is allowed from a memory tier to higher |
|
* memory tier only if the memory tier doesn't include |
|
* compute. We want to skip promotion from a memory tier, |
|
* if any node that is part of the memory tier have CPUs. |
|
* Once we detect such a memory tier, we consider that tier |
|
* as top tiper from which promotion is not allowed. |
|
*/ |
|
list_for_each_entry_reverse(memtier, &memory_tiers, list) { |
|
tier_nodes = get_memtier_nodemask(memtier); |
|
nodes_and(tier_nodes, node_states[N_CPU], tier_nodes); |
|
if (!nodes_empty(tier_nodes)) { |
|
/* |
|
* abstract distance below the max value of this memtier |
|
* is considered toptier. |
|
*/ |
|
top_tier_adistance = memtier->adistance_start + |
|
MEMTIER_CHUNK_SIZE - 1; |
|
break; |
|
} |
|
} |
|
/* |
|
* Now build the lower_tier mask for each node collecting node mask from |
|
* all memory tier below it. This allows us to fallback demotion page |
|
* allocation to a set of nodes that is closer the above selected |
|
* perferred node. |
|
*/ |
|
lower_tier = node_states[N_MEMORY]; |
|
list_for_each_entry(memtier, &memory_tiers, list) { |
|
/* |
|
* Keep removing current tier from lower_tier nodes, |
|
* This will remove all nodes in current and above |
|
* memory tier from the lower_tier mask. |
|
*/ |
|
tier_nodes = get_memtier_nodemask(memtier); |
|
nodes_andnot(lower_tier, lower_tier, tier_nodes); |
|
memtier->lower_tier_mask = lower_tier; |
|
} |
|
} |
|
|
|
#else |
|
static inline void disable_all_demotion_targets(void) {} |
|
static inline void establish_demotion_targets(void) {} |
|
#endif /* CONFIG_MIGRATION */ |
|
|
|
static inline void __init_node_memory_type(int node, struct memory_dev_type *memtype) |
|
{ |
|
if (!node_memory_types[node].memtype) |
|
node_memory_types[node].memtype = memtype; |
|
/* |
|
* for each device getting added in the same NUMA node |
|
* with this specific memtype, bump the map count. We |
|
* Only take memtype device reference once, so that |
|
* changing a node memtype can be done by droping the |
|
* only reference count taken here. |
|
*/ |
|
|
|
if (node_memory_types[node].memtype == memtype) { |
|
if (!node_memory_types[node].map_count++) |
|
kref_get(&memtype->kref); |
|
} |
|
} |
|
|
|
static struct memory_tier *set_node_memory_tier(int node) |
|
{ |
|
struct memory_tier *memtier; |
|
struct memory_dev_type *memtype; |
|
pg_data_t *pgdat = NODE_DATA(node); |
|
|
|
|
|
lockdep_assert_held_once(&memory_tier_lock); |
|
|
|
if (!node_state(node, N_MEMORY)) |
|
return ERR_PTR(-EINVAL); |
|
|
|
__init_node_memory_type(node, default_dram_type); |
|
|
|
memtype = node_memory_types[node].memtype; |
|
node_set(node, memtype->nodes); |
|
memtier = find_create_memory_tier(memtype); |
|
if (!IS_ERR(memtier)) |
|
rcu_assign_pointer(pgdat->memtier, memtier); |
|
return memtier; |
|
} |
|
|
|
static void destroy_memory_tier(struct memory_tier *memtier) |
|
{ |
|
list_del(&memtier->list); |
|
device_unregister(&memtier->dev); |
|
} |
|
|
|
static bool clear_node_memory_tier(int node) |
|
{ |
|
bool cleared = false; |
|
pg_data_t *pgdat; |
|
struct memory_tier *memtier; |
|
|
|
pgdat = NODE_DATA(node); |
|
if (!pgdat) |
|
return false; |
|
|
|
/* |
|
* Make sure that anybody looking at NODE_DATA who finds |
|
* a valid memtier finds memory_dev_types with nodes still |
|
* linked to the memtier. We achieve this by waiting for |
|
* rcu read section to finish using synchronize_rcu. |
|
* This also enables us to free the destroyed memory tier |
|
* with kfree instead of kfree_rcu |
|
*/ |
|
memtier = __node_get_memory_tier(node); |
|
if (memtier) { |
|
struct memory_dev_type *memtype; |
|
|
|
rcu_assign_pointer(pgdat->memtier, NULL); |
|
synchronize_rcu(); |
|
memtype = node_memory_types[node].memtype; |
|
node_clear(node, memtype->nodes); |
|
if (nodes_empty(memtype->nodes)) { |
|
list_del_init(&memtype->tier_sibiling); |
|
if (list_empty(&memtier->memory_types)) |
|
destroy_memory_tier(memtier); |
|
} |
|
cleared = true; |
|
} |
|
return cleared; |
|
} |
|
|
|
static void release_memtype(struct kref *kref) |
|
{ |
|
struct memory_dev_type *memtype; |
|
|
|
memtype = container_of(kref, struct memory_dev_type, kref); |
|
kfree(memtype); |
|
} |
|
|
|
struct memory_dev_type *alloc_memory_type(int adistance) |
|
{ |
|
struct memory_dev_type *memtype; |
|
|
|
memtype = kmalloc(sizeof(*memtype), GFP_KERNEL); |
|
if (!memtype) |
|
return ERR_PTR(-ENOMEM); |
|
|
|
memtype->adistance = adistance; |
|
INIT_LIST_HEAD(&memtype->tier_sibiling); |
|
memtype->nodes = NODE_MASK_NONE; |
|
kref_init(&memtype->kref); |
|
return memtype; |
|
} |
|
EXPORT_SYMBOL_GPL(alloc_memory_type); |
|
|
|
void destroy_memory_type(struct memory_dev_type *memtype) |
|
{ |
|
kref_put(&memtype->kref, release_memtype); |
|
} |
|
EXPORT_SYMBOL_GPL(destroy_memory_type); |
|
|
|
void init_node_memory_type(int node, struct memory_dev_type *memtype) |
|
{ |
|
|
|
mutex_lock(&memory_tier_lock); |
|
__init_node_memory_type(node, memtype); |
|
mutex_unlock(&memory_tier_lock); |
|
} |
|
EXPORT_SYMBOL_GPL(init_node_memory_type); |
|
|
|
void clear_node_memory_type(int node, struct memory_dev_type *memtype) |
|
{ |
|
mutex_lock(&memory_tier_lock); |
|
if (node_memory_types[node].memtype == memtype) |
|
node_memory_types[node].map_count--; |
|
/* |
|
* If we umapped all the attached devices to this node, |
|
* clear the node memory type. |
|
*/ |
|
if (!node_memory_types[node].map_count) { |
|
node_memory_types[node].memtype = NULL; |
|
kref_put(&memtype->kref, release_memtype); |
|
} |
|
mutex_unlock(&memory_tier_lock); |
|
} |
|
EXPORT_SYMBOL_GPL(clear_node_memory_type); |
|
|
|
static int __meminit memtier_hotplug_callback(struct notifier_block *self, |
|
unsigned long action, void *_arg) |
|
{ |
|
struct memory_tier *memtier; |
|
struct memory_notify *arg = _arg; |
|
|
|
/* |
|
* Only update the node migration order when a node is |
|
* changing status, like online->offline. |
|
*/ |
|
if (arg->status_change_nid < 0) |
|
return notifier_from_errno(0); |
|
|
|
switch (action) { |
|
case MEM_OFFLINE: |
|
mutex_lock(&memory_tier_lock); |
|
if (clear_node_memory_tier(arg->status_change_nid)) |
|
establish_demotion_targets(); |
|
mutex_unlock(&memory_tier_lock); |
|
break; |
|
case MEM_ONLINE: |
|
mutex_lock(&memory_tier_lock); |
|
memtier = set_node_memory_tier(arg->status_change_nid); |
|
if (!IS_ERR(memtier)) |
|
establish_demotion_targets(); |
|
mutex_unlock(&memory_tier_lock); |
|
break; |
|
} |
|
|
|
return notifier_from_errno(0); |
|
} |
|
|
|
static int __init memory_tier_init(void) |
|
{ |
|
int ret, node; |
|
struct memory_tier *memtier; |
|
|
|
ret = subsys_virtual_register(&memory_tier_subsys, NULL); |
|
if (ret) |
|
panic("%s() failed to register memory tier subsystem\n", __func__); |
|
|
|
#ifdef CONFIG_MIGRATION |
|
node_demotion = kcalloc(nr_node_ids, sizeof(struct demotion_nodes), |
|
GFP_KERNEL); |
|
WARN_ON(!node_demotion); |
|
#endif |
|
mutex_lock(&memory_tier_lock); |
|
/* |
|
* For now we can have 4 faster memory tiers with smaller adistance |
|
* than default DRAM tier. |
|
*/ |
|
default_dram_type = alloc_memory_type(MEMTIER_ADISTANCE_DRAM); |
|
if (!default_dram_type) |
|
panic("%s() failed to allocate default DRAM tier\n", __func__); |
|
|
|
/* |
|
* Look at all the existing N_MEMORY nodes and add them to |
|
* default memory tier or to a tier if we already have memory |
|
* types assigned. |
|
*/ |
|
for_each_node_state(node, N_MEMORY) { |
|
memtier = set_node_memory_tier(node); |
|
if (IS_ERR(memtier)) |
|
/* |
|
* Continue with memtiers we are able to setup |
|
*/ |
|
break; |
|
} |
|
establish_demotion_targets(); |
|
mutex_unlock(&memory_tier_lock); |
|
|
|
hotplug_memory_notifier(memtier_hotplug_callback, MEMTIER_HOTPLUG_PRIO); |
|
return 0; |
|
} |
|
subsys_initcall(memory_tier_init); |
|
|
|
bool numa_demotion_enabled = false; |
|
|
|
#ifdef CONFIG_MIGRATION |
|
#ifdef CONFIG_SYSFS |
|
static ssize_t numa_demotion_enabled_show(struct kobject *kobj, |
|
struct kobj_attribute *attr, char *buf) |
|
{ |
|
return sysfs_emit(buf, "%s\n", |
|
numa_demotion_enabled ? "true" : "false"); |
|
} |
|
|
|
static ssize_t numa_demotion_enabled_store(struct kobject *kobj, |
|
struct kobj_attribute *attr, |
|
const char *buf, size_t count) |
|
{ |
|
ssize_t ret; |
|
|
|
ret = kstrtobool(buf, &numa_demotion_enabled); |
|
if (ret) |
|
return ret; |
|
|
|
return count; |
|
} |
|
|
|
static struct kobj_attribute numa_demotion_enabled_attr = |
|
__ATTR(demotion_enabled, 0644, numa_demotion_enabled_show, |
|
numa_demotion_enabled_store); |
|
|
|
static struct attribute *numa_attrs[] = { |
|
&numa_demotion_enabled_attr.attr, |
|
NULL, |
|
}; |
|
|
|
static const struct attribute_group numa_attr_group = { |
|
.attrs = numa_attrs, |
|
}; |
|
|
|
static int __init numa_init_sysfs(void) |
|
{ |
|
int err; |
|
struct kobject *numa_kobj; |
|
|
|
numa_kobj = kobject_create_and_add("numa", mm_kobj); |
|
if (!numa_kobj) { |
|
pr_err("failed to create numa kobject\n"); |
|
return -ENOMEM; |
|
} |
|
err = sysfs_create_group(numa_kobj, &numa_attr_group); |
|
if (err) { |
|
pr_err("failed to register numa group\n"); |
|
goto delete_obj; |
|
} |
|
return 0; |
|
|
|
delete_obj: |
|
kobject_put(numa_kobj); |
|
return err; |
|
} |
|
subsys_initcall(numa_init_sysfs); |
|
#endif /* CONFIG_SYSFS */ |
|
#endif
|
|
|