On 9/1/2022 3:46 PM, Feng Tang wrote:
From 1784d7c7a6911a3d6a93d1847d84e9297c8e4f4c Mon Sep
17 00:00:00 2001
From: Feng Tang <feng.tang(a)intel.com>
Date: Wed, 31 Aug 2022 15:31:04 +0800
Subject: [PATCH v2] mm: make cpu-less memory node to movable node only
When platform has PMEM node, those node are mostly a pure memory node
without any CPU attached. To avoid performance/latency sensitive
kernel allocation on these nodes, make all cpu-less memory nodes
be enabled as node with movable zone only.
And when users want it to be normal node, appending "cpuless_node_normal"
to kernel cmdline parameters can achive that.
Signed-off-by: Feng Tang <feng.tang(a)intel.com>
Signed-off-by: Huaisheng Ye <huaisheng.ye(a)intel.com>
LGTM. Thanks.
Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
---
Changelog:
Since v1
* directly jump to out2 after movable handling (Baolin Wang)
arch/x86/mm/srat.c | 5 +++++
drivers/base/node.c | 2 ++
include/linux/nodemask.h | 1 +
mm/page_alloc.c | 32 ++++++++++++++++++++++++++++++++
4 files changed, 40 insertions(+)
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index dac07e4f5834..dcb568067d79 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -60,6 +60,9 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity
*pa)
node_set(node, numa_nodes_parsed);
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
pxm, apic_id, node);
+
+ if (!node_state(node, N_POSSIBLE_CPU))
+ node_set_state(node, N_POSSIBLE_CPU);
}
/* Callback for Proximity Domain -> LAPIC mapping */
@@ -101,6 +104,8 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
node_set(node, numa_nodes_parsed);
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
pxm, apic_id, node);
+ if (!node_state(node, N_POSSIBLE_CPU))
+ node_set_state(node, N_POSSIBLE_CPU);
}
int __init x86_acpi_numa_init(void)
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 619fb1e38dd2..39ee52c69609 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -990,6 +990,7 @@ static struct node_attr node_state_attr[] = {
#endif
[N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY),
[N_CPU] = _NODE_ATTR(has_cpu, N_CPU),
+ [N_POSSIBLE_CPU] = _NODE_ATTR(has_possible_cpu, N_POSSIBLE_CPU),
};
static struct attribute *node_state_attrs[] = {
@@ -1001,6 +1002,7 @@ static struct attribute *node_state_attrs[] = {
#endif
&node_state_attr[N_MEMORY].attr.attr,
&node_state_attr[N_CPU].attr.attr,
+ &node_state_attr[N_POSSIBLE_CPU].attr.attr,
NULL
};
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 5a30ad594ccc..ad3100b06938 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -399,6 +399,7 @@ enum node_states {
#endif
N_MEMORY, /* The node has memory(regular, high, movable) */
N_CPU, /* The node has one or more cpus */
+ N_POSSIBLE_CPU, /* possible nodes that have cpu attached, only used in early
boot phase */
NR_NODE_STATES
};
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8cfce927fb9b..f20976536cfc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -294,6 +294,7 @@ static unsigned long required_kernelcore_percent __initdata;
static unsigned long required_movablecore __initdata;
static unsigned long required_movablecore_percent __initdata;
static unsigned long zone_movable_pfn[MAX_NUMNODES] __meminitdata;
+static bool enable_cpuless_memnode_normal_node __initdata;
static bool mirrored_kernelcore __meminitdata;
/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
@@ -7163,6 +7164,26 @@ static void __init find_zone_movable_pfns_for_nodes(void)
required_movablecore = (totalpages * 100 * required_movablecore_percent) /
10000UL;
+ if (!required_kernelcore && !required_movablecore &&
+ !nodes_empty(node_states[N_POSSIBLE_CPU]) &&
+ !enable_cpuless_memnode_normal_node) {
+
+ /* Put meory from cpu-less nodes into movable zones */
+ for_each_memblock(memory, r) {
+ nid = memblock_get_region_node(r);
+
+ if (node_isset(nid, node_states[N_POSSIBLE_CPU]))
+ continue;
+
+ usable_startpfn = PFN_DOWN(r->base);
+ zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
+ min(usable_startpfn, zone_movable_pfn[nid]) :
+ usable_startpfn;
+ }
+
+ goto out2;
+ }
+
/*
* If movablecore= was specified, calculate what size of
* kernelcore that corresponds so that memory usable for
@@ -7457,8 +7478,19 @@ static int __init cmdline_parse_movablecore(char *p)
&required_movablecore_percent);
}
+/*
+ * cpuless memory nodes will be enabled to movable node by default,
+ * add this cmdline to make it be enabled as a normal node
+ */
+static int __init cmdline_parse_cpuless_memnode(char *str)
+{
+ enable_cpuless_memnode_normal_node = true;
+ return 0;
+}
+
early_param("kernelcore", cmdline_parse_kernelcore);
early_param("movablecore", cmdline_parse_movablecore);
+early_param("cpuless_node_normal", cmdline_parse_cpuless_memnode);
#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */