From: Huang Ying <ying.huang(a)intel.com>
In a system with multiple memory types, e.g. DRAM and PMEM, the CPU
and DRAM in one socket will be put in one NUMA node as before, while
the PMEM will be put in another NUMA node as described in the
description of the commit c221c0b0308f ("device-dax: "Hotplug"
persistent memory for use like normal RAM"). So, the NUMA balancing
mechanism will identify all PMEM accesses as remote access and try to
promote the PMEM pages to DRAM.
To distinguish the number of the inter-type promoted pages from that
of the inter-socket migrated pages. A new vmstat count is added. The
counter is per-node (count in the target node). So this can be used
to identify promotion imbalance among the NUMA nodes.
Signed-off-by: "Huang, Ying" <ying.huang(a)intel.com>
Reviewed-by: Yang Shi <shy828301(a)gmail.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Rik van Riel <riel(a)surriel.com>
Cc: Mel Gorman <mgorman(a)techsingularity.net>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: Wei Xu <weixugc(a)google.com>
Cc: osalvador <osalvador(a)suse.de>
Cc: Shakeel Butt <shakeelb(a)google.com>
Cc: Hasan Al Maruf <hasanalmaruf(a)fb.com>
Cc: linux-kernel(a)vger.kernel.org
Cc: linux-mm(a)kvack.org
Signed-off-by: zhongjiang-ali <zhongjiang-ali(a)linux.alibaba.com>
---
include/linux/mmzone.h | 3 +++
include/linux/node.h | 5 +++++
mm/migrate.c | 13 ++++++++++---
mm/vmstat.c | 3 +++
4 files changed, 21 insertions(+), 3 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d9ea502..48982e3 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -206,6 +206,9 @@ enum node_stat_item {
NR_DIRTIED, /* page dirtyings since bootup */
NR_WRITTEN, /* page writings since bootup */
NR_INDIRECTLY_RECLAIMABLE_BYTES, /* measured in bytes */
+#ifdef CONFIG_NUMA_BALANCING
+ PGPROMOTE_SUCCESS, /* promote successfully */
+#endif
NR_VM_NODE_STAT_ITEMS
};
diff --git a/include/linux/node.h b/include/linux/node.h
index 1a557c5..b92366f 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -190,4 +190,9 @@ static inline void
register_hugetlbfs_with_node(node_registration_func_t reg,
#define to_node(device) container_of(device, struct node, dev)
+static inline bool node_is_toptier(int node)
+{
+ return node_state(node, N_CPU);
+}
+
#endif /* _LINUX_NODE_H_ */
diff --git a/mm/migrate.c b/mm/migrate.c
index 2ad47b0..f86a082 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1995,6 +1995,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct
*vma,
pg_data_t *pgdat = NODE_DATA(node);
int isolated;
int nr_remaining;
+ int nr_succeeded;
LIST_HEAD(migratepages);
/*
@@ -2019,7 +2020,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct
*vma,
list_add(&page->lru, &migratepages);
nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page,
NULL, node, MIGRATE_ASYNC,
- MR_NUMA_MISPLACED, NULL);
+ MR_NUMA_MISPLACED, &nr_succeeded);
if (nr_remaining) {
if (!list_empty(&migratepages)) {
list_del(&page->lru);
@@ -2028,8 +2029,14 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct
*vma,
putback_lru_page(page);
}
isolated = 0;
- } else
- count_vm_numa_event(NUMA_PAGE_MIGRATE);
+ }
+
+ if (nr_succeeded) {
+ count_vm_numa_events(NUMA_PAGE_MIGRATE, nr_succeeded);
+ if (!node_is_toptier(page_to_nid(page)) && node_is_toptier(node))
+ mod_node_page_state(NODE_DATA(node), PGPROMOTE_SUCCESS,
+ nr_succeeded);
+ }
BUG_ON(!list_empty(&migratepages));
return isolated;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c457bcc..b87b8b2 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1186,6 +1186,9 @@ int fragmentation_index(struct zone *zone, unsigned int order)
"nr_dirtied",
"nr_written",
"", /* nr_indirectly_reclaimable */
+#ifdef CONFIG_NUMA_BALANCING
+ "pgpromote_success",
+#endif
/* enum writeback_stat_item counters */
"nr_dirty_threshold",
--
1.8.3.1