On 2022/8/31 3:31 下午, Huaisheng Ye wrote:
From: Feng Tang <feng.tang(a)intel.com>
When platform has PMEM node, those node are mostly a pure memory node
without any CPU attached. To avoid performance/latency sensitive
kernel allocation on these nodes, make all cpu-less memory nodes
be enabled as node with movable zone only.
And when users want it to be normal node, appending "cpuless_node_normal"
to kernel cmdline parameters can achive that.
Signed-off-by: Feng Tang <feng.tang(a)intel.com>
Signed-off-by: Huaisheng Ye <huaisheng.ye(a)intel.com>
---
arch/x86/mm/srat.c | 5 +++++
drivers/base/node.c | 2 ++
include/linux/nodemask.h | 1 +
mm/page_alloc.c | 30 ++++++++++++++++++++++++++++++
4 files changed, 38 insertions(+)
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index dac07e4f5834..dcb568067d79 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -60,6 +60,9 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity
*pa)
node_set(node, numa_nodes_parsed);
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
pxm, apic_id, node);
+
+ if (!node_state(node, N_POSSIBLE_CPU))
+ node_set_state(node, N_POSSIBLE_CPU);
}
/* Callback for Proximity Domain -> LAPIC mapping */
@@ -101,6 +104,8 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity
*pa)
node_set(node, numa_nodes_parsed);
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
pxm, apic_id, node);
+ if (!node_state(node, N_POSSIBLE_CPU))
+ node_set_state(node, N_POSSIBLE_CPU);
}
IIUC, pmem memory also store in the srat table, the above change seems
to fail to work because pmem memory has marked as the N_POSSIBLE_CPU.
Am I missing?
Thanks,
int __init x86_acpi_numa_init(void)
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 619fb1e38dd2..39ee52c69609 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -990,6 +990,7 @@ static struct node_attr node_state_attr[] = {
#endif
[N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY),
[N_CPU] = _NODE_ATTR(has_cpu, N_CPU),
+ [N_POSSIBLE_CPU] = _NODE_ATTR(has_possible_cpu, N_POSSIBLE_CPU),
};
static struct attribute *node_state_attrs[] = {
@@ -1001,6 +1002,7 @@ static struct attribute *node_state_attrs[] = {
#endif
&node_state_attr[N_MEMORY].attr.attr,
&node_state_attr[N_CPU].attr.attr,
+ &node_state_attr[N_POSSIBLE_CPU].attr.attr,
NULL
};
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 5a30ad594ccc..ad3100b06938 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -399,6 +399,7 @@ enum node_states {
#endif
N_MEMORY, /* The node has memory(regular, high, movable) */
N_CPU, /* The node has one or more cpus */
+ N_POSSIBLE_CPU, /* possible nodes that have cpu attached, only used in early
boot phase */
NR_NODE_STATES
};
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8cfce927fb9b..9570ee499c12 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -294,6 +294,7 @@ static unsigned long required_kernelcore_percent __initdata;
static unsigned long required_movablecore __initdata;
static unsigned long required_movablecore_percent __initdata;
static unsigned long zone_movable_pfn[MAX_NUMNODES] __meminitdata;
+static bool enable_cpuless_memnode_normal_node __initdata;
static bool mirrored_kernelcore __meminitdata;
/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
@@ -7163,6 +7164,24 @@ static void __init find_zone_movable_pfns_for_nodes(void)
required_movablecore = (totalpages * 100 * required_movablecore_percent) /
10000UL;
+ if (!required_kernelcore && !required_movablecore &&
+ !nodes_empty(node_states[N_POSSIBLE_CPU]) &&
+ !enable_cpuless_memnode_normal_node) {
+
+ /* Put meory from cpu-less nodes into movable zones */
+ for_each_memblock(memory, r) {
+ nid = memblock_get_region_node(r);
+
+ if (node_isset(nid, node_states[N_POSSIBLE_CPU]))
+ continue;
+
+ usable_startpfn = PFN_DOWN(r->base);
+ zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
+ min(usable_startpfn, zone_movable_pfn[nid]) :
+ usable_startpfn;
+ }
+ }
+
/*
* If movablecore= was specified, calculate what size of
* kernelcore that corresponds so that memory usable for
@@ -7457,8 +7476,19 @@ static int __init cmdline_parse_movablecore(char *p)
&required_movablecore_percent);
}
+/*
+ * cpuless memory nodes will be enabled to movable node by default,
+ * add this cmdline to make it be enabled as a normal node
+ */
+static int __init cmdline_parse_cpuless_memnode(char *str)
+{
+ enable_cpuless_memnode_normal_node = true;
+ return 0;
+}
+
early_param("kernelcore", cmdline_parse_kernelcore);
early_param("movablecore", cmdline_parse_movablecore);
+early_param("cpuless_node_normal", cmdline_parse_cpuless_memnode);
#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */