On 9/1/2022 3:46 PM, Feng Tang wrote:
   From 1784d7c7a6911a3d6a93d1847d84e9297c8e4f4c Mon Sep
17 00:00:00 2001
 From: Feng Tang <feng.tang(a)intel.com>
 Date: Wed, 31 Aug 2022 15:31:04 +0800
 Subject: [PATCH v2] mm: make cpu-less memory node to movable node only
 
 When platform has PMEM node, those node are mostly a pure memory node
 without any CPU attached. To avoid performance/latency sensitive
 kernel allocation on these nodes, make all cpu-less memory nodes
 be enabled as node with movable zone only.
 
 And when users want it to be normal node, appending "cpuless_node_normal"
 to kernel cmdline parameters can achive that.
 
 Signed-off-by: Feng Tang <feng.tang(a)intel.com>
 Signed-off-by: Huaisheng Ye <huaisheng.ye(a)intel.com> 
LGTM. Thanks.
Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
  ---
 Changelog:
 
      Since v1
      * directly jump to out2 after movable handling (Baolin Wang)
 
   arch/x86/mm/srat.c       |  5 +++++
   drivers/base/node.c      |  2 ++
   include/linux/nodemask.h |  1 +
   mm/page_alloc.c          | 32 ++++++++++++++++++++++++++++++++
   4 files changed, 40 insertions(+)
 
 diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
 index dac07e4f5834..dcb568067d79 100644
 --- a/arch/x86/mm/srat.c
 +++ b/arch/x86/mm/srat.c
 @@ -60,6 +60,9 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity
*pa)
   	node_set(node, numa_nodes_parsed);
   	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
   	       pxm, apic_id, node);
 +
 +	if (!node_state(node, N_POSSIBLE_CPU))
 +		node_set_state(node, N_POSSIBLE_CPU);
   }
   
   /* Callback for Proximity Domain -> LAPIC mapping */
 @@ -101,6 +104,8 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
   	node_set(node, numa_nodes_parsed);
   	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
   	       pxm, apic_id, node);
 +	if (!node_state(node, N_POSSIBLE_CPU))
 +		node_set_state(node, N_POSSIBLE_CPU);
   }
   
   int __init x86_acpi_numa_init(void)
 diff --git a/drivers/base/node.c b/drivers/base/node.c
 index 619fb1e38dd2..39ee52c69609 100644
 --- a/drivers/base/node.c
 +++ b/drivers/base/node.c
 @@ -990,6 +990,7 @@ static struct node_attr node_state_attr[] = {
   #endif
   	[N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY),
   	[N_CPU] = _NODE_ATTR(has_cpu, N_CPU),
 +	[N_POSSIBLE_CPU] = _NODE_ATTR(has_possible_cpu, N_POSSIBLE_CPU),
   };
   
   static struct attribute *node_state_attrs[] = {
 @@ -1001,6 +1002,7 @@ static struct attribute *node_state_attrs[] = {
   #endif
   	&node_state_attr[N_MEMORY].attr.attr,
   	&node_state_attr[N_CPU].attr.attr,
 +	&node_state_attr[N_POSSIBLE_CPU].attr.attr,
   	NULL
   };
   
 diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
 index 5a30ad594ccc..ad3100b06938 100644
 --- a/include/linux/nodemask.h
 +++ b/include/linux/nodemask.h
 @@ -399,6 +399,7 @@ enum node_states {
   #endif
   	N_MEMORY,		/* The node has memory(regular, high, movable) */
   	N_CPU,		/* The node has one or more cpus */
 +	N_POSSIBLE_CPU,         /* possible nodes that have cpu attached, only used in early
boot phase */
   	NR_NODE_STATES
   };
   
 diff --git a/mm/page_alloc.c b/mm/page_alloc.c
 index 8cfce927fb9b..f20976536cfc 100644
 --- a/mm/page_alloc.c
 +++ b/mm/page_alloc.c
 @@ -294,6 +294,7 @@ static unsigned long required_kernelcore_percent __initdata;
   static unsigned long required_movablecore __initdata;
   static unsigned long required_movablecore_percent __initdata;
   static unsigned long zone_movable_pfn[MAX_NUMNODES] __meminitdata;
 +static bool enable_cpuless_memnode_normal_node __initdata;
   static bool mirrored_kernelcore __meminitdata;
   
   /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
 @@ -7163,6 +7164,26 @@ static void __init find_zone_movable_pfns_for_nodes(void)
   		required_movablecore = (totalpages * 100 * required_movablecore_percent) /
   					10000UL;
   
 +	if (!required_kernelcore && !required_movablecore &&
 +		!nodes_empty(node_states[N_POSSIBLE_CPU]) &&
 +		!enable_cpuless_memnode_normal_node) {
 +
 +		/* Put meory from cpu-less nodes into movable zones */
 +		for_each_memblock(memory, r) {
 +			nid = memblock_get_region_node(r);
 +
 +			if (node_isset(nid, node_states[N_POSSIBLE_CPU]))
 +				continue;
 +
 +			usable_startpfn = PFN_DOWN(r->base);
 +			zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
 +				min(usable_startpfn, zone_movable_pfn[nid]) :
 +				usable_startpfn;
 +		}
 +
 +		goto out2;
 +	}
 +
   	/*
   	 * If movablecore= was specified, calculate what size of
   	 * kernelcore that corresponds so that memory usable for
 @@ -7457,8 +7478,19 @@ static int __init cmdline_parse_movablecore(char *p)
   				  &required_movablecore_percent);
   }
   
 +/*
 + * cpuless memory nodes will be enabled to movable node by default,
 + * add this cmdline to make it be enabled as a normal node
 + */
 +static int __init cmdline_parse_cpuless_memnode(char *str)
 +{
 +	enable_cpuless_memnode_normal_node = true;
 +	return 0;
 +}
 +
   early_param("kernelcore", cmdline_parse_kernelcore);
   early_param("movablecore", cmdline_parse_movablecore);
 +early_param("cpuless_node_normal", cmdline_parse_cpuless_memnode);
   
   #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */