lists.openanolis.cn
登录
登录
管理这个列表
×
Keyboard Shortcuts
Thread View
j
: Next unread message
k
: Previous unread message
j a
: Jump to all threads
j l
: Jump to MailingList overview
2024
December
November
October
September
August
July
June
May
April
March
February
January
2023
December
November
October
September
August
July
June
May
April
March
February
January
2022
December
November
October
September
August
July
June
May
April
March
February
January
2021
December
列表概述
下载
Pmem
September 2022
----- 2024 -----
December 2024
November 2024
October 2024
September 2024
August 2024
July 2024
June 2024
May 2024
April 2024
March 2024
February 2024
January 2024
----- 2023 -----
December 2023
November 2023
October 2023
September 2023
August 2023
July 2023
June 2023
May 2023
April 2023
March 2023
February 2023
January 2023
----- 2022 -----
December 2022
November 2022
October 2022
September 2022
August 2022
July 2022
June 2022
May 2022
April 2022
March 2022
February 2022
January 2022
----- 2021 -----
December 2021
pmem@lists.openanolis.cn
2 参与者
3 讨论
Start a n
N
ew thread
Re: [PATCH v2] mm: make cpu-less memory node to movable node only
by Baolin Wang
On 9/1/2022 3:46 PM, Feng Tang wrote: > From 1784d7c7a6911a3d6a93d1847d84e9297c8e4f4c Mon Sep 17 00:00:00 2001 > From: Feng Tang <feng.tang(a)intel.com> > Date: Wed, 31 Aug 2022 15:31:04 +0800 > Subject: [PATCH v2] mm: make cpu-less memory node to movable node only > > When platform has PMEM node, those node are mostly a pure memory node > without any CPU attached. To avoid performance/latency sensitive > kernel allocation on these nodes, make all cpu-less memory nodes > be enabled as node with movable zone only. > > And when users want it to be normal node, appending "cpuless_node_normal" > to kernel cmdline parameters can achive that. > > Signed-off-by: Feng Tang <feng.tang(a)intel.com> > Signed-off-by: Huaisheng Ye <huaisheng.ye(a)intel.com> LGTM. Thanks. Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com> > --- > Changelog: > > Since v1 > * directly jump to out2 after movable handling (Baolin Wang) > > arch/x86/mm/srat.c | 5 +++++ > drivers/base/node.c | 2 ++ > include/linux/nodemask.h | 1 + > mm/page_alloc.c | 32 ++++++++++++++++++++++++++++++++ > 4 files changed, 40 insertions(+) > > diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c > index dac07e4f5834..dcb568067d79 100644 > --- a/arch/x86/mm/srat.c > +++ b/arch/x86/mm/srat.c > @@ -60,6 +60,9 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) > node_set(node, numa_nodes_parsed); > printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", > pxm, apic_id, node); > + > + if (!node_state(node, N_POSSIBLE_CPU)) > + node_set_state(node, N_POSSIBLE_CPU); > } > > /* Callback for Proximity Domain -> LAPIC mapping */ > @@ -101,6 +104,8 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) > node_set(node, numa_nodes_parsed); > printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", > pxm, apic_id, node); > + if (!node_state(node, N_POSSIBLE_CPU)) > + node_set_state(node, N_POSSIBLE_CPU); > } > > int __init x86_acpi_numa_init(void) > diff --git a/drivers/base/node.c b/drivers/base/node.c > index 619fb1e38dd2..39ee52c69609 100644 > --- a/drivers/base/node.c > +++ b/drivers/base/node.c > @@ -990,6 +990,7 @@ static struct node_attr node_state_attr[] = { > #endif > [N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY), > [N_CPU] = _NODE_ATTR(has_cpu, N_CPU), > + [N_POSSIBLE_CPU] = _NODE_ATTR(has_possible_cpu, N_POSSIBLE_CPU), > }; > > static struct attribute *node_state_attrs[] = { > @@ -1001,6 +1002,7 @@ static struct attribute *node_state_attrs[] = { > #endif > &node_state_attr[N_MEMORY].attr.attr, > &node_state_attr[N_CPU].attr.attr, > + &node_state_attr[N_POSSIBLE_CPU].attr.attr, > NULL > }; > > diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h > index 5a30ad594ccc..ad3100b06938 100644 > --- a/include/linux/nodemask.h > +++ b/include/linux/nodemask.h > @@ -399,6 +399,7 @@ enum node_states { > #endif > N_MEMORY, /* The node has memory(regular, high, movable) */ > N_CPU, /* The node has one or more cpus */ > + N_POSSIBLE_CPU, /* possible nodes that have cpu attached, only used in early boot phase */ > NR_NODE_STATES > }; > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index 8cfce927fb9b..f20976536cfc 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -294,6 +294,7 @@ static unsigned long required_kernelcore_percent __initdata; > static unsigned long required_movablecore __initdata; > static unsigned long required_movablecore_percent __initdata; > static unsigned long zone_movable_pfn[MAX_NUMNODES] __meminitdata; > +static bool enable_cpuless_memnode_normal_node __initdata; > static bool mirrored_kernelcore __meminitdata; > > /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ > @@ -7163,6 +7164,26 @@ static void __init find_zone_movable_pfns_for_nodes(void) > required_movablecore = (totalpages * 100 * required_movablecore_percent) / > 10000UL; > > + if (!required_kernelcore && !required_movablecore && > + !nodes_empty(node_states[N_POSSIBLE_CPU]) && > + !enable_cpuless_memnode_normal_node) { > + > + /* Put meory from cpu-less nodes into movable zones */ > + for_each_memblock(memory, r) { > + nid = memblock_get_region_node(r); > + > + if (node_isset(nid, node_states[N_POSSIBLE_CPU])) > + continue; > + > + usable_startpfn = PFN_DOWN(r->base); > + zone_movable_pfn[nid] = zone_movable_pfn[nid] ? > + min(usable_startpfn, zone_movable_pfn[nid]) : > + usable_startpfn; > + } > + > + goto out2; > + } > + > /* > * If movablecore= was specified, calculate what size of > * kernelcore that corresponds so that memory usable for > @@ -7457,8 +7478,19 @@ static int __init cmdline_parse_movablecore(char *p) > &required_movablecore_percent); > } > > +/* > + * cpuless memory nodes will be enabled to movable node by default, > + * add this cmdline to make it be enabled as a normal node > + */ > +static int __init cmdline_parse_cpuless_memnode(char *str) > +{ > + enable_cpuless_memnode_normal_node = true; > + return 0; > +} > + > early_param("kernelcore", cmdline_parse_kernelcore); > early_param("movablecore", cmdline_parse_movablecore); > +early_param("cpuless_node_normal", cmdline_parse_cpuless_memnode); > > #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ >
2 years, 3 months
1
0
0
0
Re: [PATCH] mm: make cpu-less memory node to movable node only
by zhong jiang
On 2022/9/1 9:46 上午, Feng Tang wrote: > Hi Zhong Jiang, > > Thanks for the review! > > On Wed, Aug 31, 2022 at 07:20:21PM +0800, zhong jiang wrote: >> On 2022/8/31 3:31 下午, Huaisheng Ye wrote: >>> From: Feng Tang <feng.tang(a)intel.com> >>> >>> When platform has PMEM node, those node are mostly a pure memory node >>> without any CPU attached. To avoid performance/latency sensitive >>> kernel allocation on these nodes, make all cpu-less memory nodes >>> be enabled as node with movable zone only. >>> >>> And when users want it to be normal node, appending "cpuless_node_normal" >>> to kernel cmdline parameters can achive that. >>> >>> Signed-off-by: Feng Tang <feng.tang(a)intel.com> >>> Signed-off-by: Huaisheng Ye <huaisheng.ye(a)intel.com> >>> --- >>> arch/x86/mm/srat.c | 5 +++++ >>> drivers/base/node.c | 2 ++ >>> include/linux/nodemask.h | 1 + >>> mm/page_alloc.c | 30 ++++++++++++++++++++++++++++++ >>> 4 files changed, 38 insertions(+) >>> >>> diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c >>> index dac07e4f5834..dcb568067d79 100644 >>> --- a/arch/x86/mm/srat.c >>> +++ b/arch/x86/mm/srat.c >>> @@ -60,6 +60,9 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) >>> node_set(node, numa_nodes_parsed); >>> printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", >>> pxm, apic_id, node); >>> + >>> + if (!node_state(node, N_POSSIBLE_CPU)) >>> + node_set_state(node, N_POSSIBLE_CPU); >>> } >>> /* Callback for Proximity Domain -> LAPIC mapping */ >>> @@ -101,6 +104,8 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) >>> node_set(node, numa_nodes_parsed); >>> printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", >>> pxm, apic_id, node); >>> + if (!node_state(node, N_POSSIBLE_CPU)) >>> + node_set_state(node, N_POSSIBLE_CPU); >>> } >> IIUC, pmem memory also store in the srat table, the above change seems to >> fail to work because pmem memory has marked as the N_POSSIBLE_CPU. > PMEM node will not be set for N_POSSIBLE_CPU, the upper 2 functions > are only for parsing CPU entry with interrupt controllers. > > IIUC, you are refering the NUMA node info maintained by architectural > code (say arch/x86): node_possible_map / numa_nodes_parsed, where PMEM > node will be set. > > And the node map here N_POSSIBLE_CPU is from another set mainly for > mm/ code, and used later in memory init part. > > This patch has been tested on QEMU and RE7P machines. Thanks for your clarification. > Thanks, > Feng > > >> Am I missing? >> >> Thanks, >>> int __init x86_acpi_numa_init(void) >>> diff --git a/drivers/base/node.c b/drivers/base/node.c >>> index 619fb1e38dd2..39ee52c69609 100644 >>> --- a/drivers/base/node.c >>> +++ b/drivers/base/node.c >>> @@ -990,6 +990,7 @@ static struct node_attr node_state_attr[] = { >>> #endif >>> [N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY), >>> [N_CPU] = _NODE_ATTR(has_cpu, N_CPU), >>> + [N_POSSIBLE_CPU] = _NODE_ATTR(has_possible_cpu, N_POSSIBLE_CPU), >>> }; >>> static struct attribute *node_state_attrs[] = { >>> @@ -1001,6 +1002,7 @@ static struct attribute *node_state_attrs[] = { >>> #endif >>> &node_state_attr[N_MEMORY].attr.attr, >>> &node_state_attr[N_CPU].attr.attr, >>> + &node_state_attr[N_POSSIBLE_CPU].attr.attr, >>> NULL >>> }; >>> diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h >>> index 5a30ad594ccc..ad3100b06938 100644 >>> --- a/include/linux/nodemask.h >>> +++ b/include/linux/nodemask.h >>> @@ -399,6 +399,7 @@ enum node_states { >>> #endif >>> N_MEMORY, /* The node has memory(regular, high, movable) */ >>> N_CPU, /* The node has one or more cpus */ >>> + N_POSSIBLE_CPU, /* possible nodes that have cpu attached, only used in early boot phase */ >>> NR_NODE_STATES >>> }; >>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c >>> index 8cfce927fb9b..9570ee499c12 100644 >>> --- a/mm/page_alloc.c >>> +++ b/mm/page_alloc.c >>> @@ -294,6 +294,7 @@ static unsigned long required_kernelcore_percent __initdata; >>> static unsigned long required_movablecore __initdata; >>> static unsigned long required_movablecore_percent __initdata; >>> static unsigned long zone_movable_pfn[MAX_NUMNODES] __meminitdata; >>> +static bool enable_cpuless_memnode_normal_node __initdata; >>> static bool mirrored_kernelcore __meminitdata; >>> /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ >>> @@ -7163,6 +7164,24 @@ static void __init find_zone_movable_pfns_for_nodes(void) >>> required_movablecore = (totalpages * 100 * required_movablecore_percent) / >>> 10000UL; >>> + if (!required_kernelcore && !required_movablecore && >>> + !nodes_empty(node_states[N_POSSIBLE_CPU]) && >>> + !enable_cpuless_memnode_normal_node) { >>> + >>> + /* Put meory from cpu-less nodes into movable zones */ >>> + for_each_memblock(memory, r) { >>> + nid = memblock_get_region_node(r); >>> + >>> + if (node_isset(nid, node_states[N_POSSIBLE_CPU])) >>> + continue; >>> + >>> + usable_startpfn = PFN_DOWN(r->base); >>> + zone_movable_pfn[nid] = zone_movable_pfn[nid] ? >>> + min(usable_startpfn, zone_movable_pfn[nid]) : >>> + usable_startpfn; >>> + } >>> + } >>> + >>> /* >>> * If movablecore= was specified, calculate what size of >>> * kernelcore that corresponds so that memory usable for >>> @@ -7457,8 +7476,19 @@ static int __init cmdline_parse_movablecore(char *p) >>> &required_movablecore_percent); >>> } >>> +/* >>> + * cpuless memory nodes will be enabled to movable node by default, >>> + * add this cmdline to make it be enabled as a normal node >>> + */ >>> +static int __init cmdline_parse_cpuless_memnode(char *str) >>> +{ >>> + enable_cpuless_memnode_normal_node = true; >>> + return 0; >>> +} >>> + >>> early_param("kernelcore", cmdline_parse_kernelcore); >>> early_param("movablecore", cmdline_parse_movablecore); >>> +early_param("cpuless_node_normal", cmdline_parse_cpuless_memnode); >>> #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
2 years, 3 months
1
0
0
0
[PATCH] mm: make cpu-less memory node to movable node only
by Huaisheng Ye
From: Feng Tang <feng.tang(a)intel.com> When platform has PMEM node, those node are mostly a pure memory node without any CPU attached. To avoid performance/latency sensitive kernel allocation on these nodes, make all cpu-less memory nodes be enabled as node with movable zone only. And when users want it to be normal node, appending "cpuless_node_normal" to kernel cmdline parameters can achive that. Signed-off-by: Feng Tang <feng.tang(a)intel.com> Signed-off-by: Huaisheng Ye <huaisheng.ye(a)intel.com> --- arch/x86/mm/srat.c | 5 +++++ drivers/base/node.c | 2 ++ include/linux/nodemask.h | 1 + mm/page_alloc.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 38 insertions(+) diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index dac07e4f5834..dcb568067d79 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -60,6 +60,9 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) node_set(node, numa_nodes_parsed); printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", pxm, apic_id, node); + + if (!node_state(node, N_POSSIBLE_CPU)) + node_set_state(node, N_POSSIBLE_CPU); } /* Callback for Proximity Domain -> LAPIC mapping */ @@ -101,6 +104,8 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) node_set(node, numa_nodes_parsed); printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", pxm, apic_id, node); + if (!node_state(node, N_POSSIBLE_CPU)) + node_set_state(node, N_POSSIBLE_CPU); } int __init x86_acpi_numa_init(void) diff --git a/drivers/base/node.c b/drivers/base/node.c index 619fb1e38dd2..39ee52c69609 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -990,6 +990,7 @@ static struct node_attr node_state_attr[] = { #endif [N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY), [N_CPU] = _NODE_ATTR(has_cpu, N_CPU), + [N_POSSIBLE_CPU] = _NODE_ATTR(has_possible_cpu, N_POSSIBLE_CPU), }; static struct attribute *node_state_attrs[] = { @@ -1001,6 +1002,7 @@ static struct attribute *node_state_attrs[] = { #endif &node_state_attr[N_MEMORY].attr.attr, &node_state_attr[N_CPU].attr.attr, + &node_state_attr[N_POSSIBLE_CPU].attr.attr, NULL }; diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 5a30ad594ccc..ad3100b06938 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -399,6 +399,7 @@ enum node_states { #endif N_MEMORY, /* The node has memory(regular, high, movable) */ N_CPU, /* The node has one or more cpus */ + N_POSSIBLE_CPU, /* possible nodes that have cpu attached, only used in early boot phase */ NR_NODE_STATES }; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8cfce927fb9b..9570ee499c12 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -294,6 +294,7 @@ static unsigned long required_kernelcore_percent __initdata; static unsigned long required_movablecore __initdata; static unsigned long required_movablecore_percent __initdata; static unsigned long zone_movable_pfn[MAX_NUMNODES] __meminitdata; +static bool enable_cpuless_memnode_normal_node __initdata; static bool mirrored_kernelcore __meminitdata; /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ @@ -7163,6 +7164,24 @@ static void __init find_zone_movable_pfns_for_nodes(void) required_movablecore = (totalpages * 100 * required_movablecore_percent) / 10000UL; + if (!required_kernelcore && !required_movablecore && + !nodes_empty(node_states[N_POSSIBLE_CPU]) && + !enable_cpuless_memnode_normal_node) { + + /* Put meory from cpu-less nodes into movable zones */ + for_each_memblock(memory, r) { + nid = memblock_get_region_node(r); + + if (node_isset(nid, node_states[N_POSSIBLE_CPU])) + continue; + + usable_startpfn = PFN_DOWN(r->base); + zone_movable_pfn[nid] = zone_movable_pfn[nid] ? + min(usable_startpfn, zone_movable_pfn[nid]) : + usable_startpfn; + } + } + /* * If movablecore= was specified, calculate what size of * kernelcore that corresponds so that memory usable for @@ -7457,8 +7476,19 @@ static int __init cmdline_parse_movablecore(char *p) &required_movablecore_percent); } +/* + * cpuless memory nodes will be enabled to movable node by default, + * add this cmdline to make it be enabled as a normal node + */ +static int __init cmdline_parse_cpuless_memnode(char *str) +{ + enable_cpuless_memnode_normal_node = true; + return 0; +} + early_param("kernelcore", cmdline_parse_kernelcore); early_param("movablecore", cmdline_parse_movablecore); +early_param("cpuless_node_normal", cmdline_parse_cpuless_memnode); #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ -- 2.27.0
2 years, 3 months
3
3
0
0
← Newer
1
Older →
Jump to page:
1
Results per page:
10
25
50
100
200