[ck] [PATCH openEuler v1 047/119] openEuler: resctrlfs: mpam: init struct for mpam

24 Mar 2021

From: Yang Yingliang &lt;yangyingliang(a)huawei.com&gt;

commit 1abcabe9dab59eca2890c65ca81af0354c13bcd3 openEuler-1.0

hulk inclusion
category: feature
bugzilla: 5510
CVE: NA

Signed-off-by: Xie XiuQi &lt;xiexiuqi(a)huawei.com&gt;
Reviewed-by: Hanjun Guo &lt;guohanjun(a)huawei.com&gt;
Signed-off-by: Yang Yingliang &lt;yangyingliang(a)huawei.com&gt;
Signed-off-by: Xin Hao &lt;haoxing990(a)gmail.com&gt;
---
 arch/arm64/Kconfig                  |   8 +
 arch/arm64/include/asm/mpam.h       | 137 ++++++
 arch/arm64/include/asm/mpam_sched.h |  33 ++
 arch/arm64/include/asm/resctrl.h    |  56 +++
 arch/arm64/kernel/Makefile          |   1 +
 arch/arm64/kernel/mpam.c            | 712 ++++++++++++++++++++++++++++
 arch/arm64/kernel/mpam_ctrlmon.c    |  45 ++
 arch/arm64/kernel/mpam_mon.c        |  51 ++
 fs/Kconfig                          |   9 +
 fs/resctrlfs.c                      |  26 +-
 include/linux/sched.h               |   2 +-
 11 files changed, 1068 insertions(+), 12 deletions(-)
 create mode 100644 arch/arm64/include/asm/mpam.h
 create mode 100644 arch/arm64/include/asm/mpam_sched.h
 create mode 100644 arch/arm64/include/asm/resctrl.h
 create mode 100644 arch/arm64/kernel/mpam.c
 create mode 100644 arch/arm64/kernel/mpam_ctrlmon.c
 create mode 100644 arch/arm64/kernel/mpam_mon.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8f77da2fa0e9..f7a8d2743b61 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -792,6 +792,14 @@ config HOTPLUG_CPU
 	  Say Y here to experiment with turning CPUs off and on.  CPUs
 	  can be controlled through /sys/devices/system/cpu.
 
+config MPAM
+	bool "Support Memory Partitioning and Monitoring"
+	default n
+	select RESCTRL_AARCH64
+	help
+	  Memory Partitioning and Monitoring. More exactly Memory system
+	  performance resource Partitioning and Monitoring
+
 # Common NUMA Features
 config NUMA
 	bool "Numa Memory Allocation and Scheduler Support"
diff --git a/arch/arm64/include/asm/mpam.h b/arch/arm64/include/asm/mpam.h
new file mode 100644
index 000000000000..dd86be45ccc2
--- /dev/null
+++ b/arch/arm64/include/asm/mpam.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARM64_MPAM_H
+#define _ASM_ARM64_MPAM_H
+
+#include <linux/sched.h>
+#include <linux/kernfs.h>
+#include <linux/jump_label.h>
+
+#include <linux/seq_buf.h>
+#include <linux/seq_file.h>
+
+DECLARE_STATIC_KEY_FALSE(resctrl_enable_key);
+DECLARE_STATIC_KEY_FALSE(resctrl_mon_enable_key);
+
+extern bool rdt_alloc_capable;
+extern bool rdt_mon_capable;
+
+enum rdt_group_type {
+	RDTCTRL_GROUP = 0,
+	RDTMON_GROUP,
+	RDT_NUM_GROUP,
+};
+
+/**
+ * struct mongroup - store mon group's data in resctrl fs.
+ * @mon_data_kn		kernlfs node for the mon_data directory
+ * @parent:			parent rdtgrp
+ * @crdtgrp_list:		child rdtgroup node list
+ * @rmid:			rmid for this rdtgroup
+ */
+struct mongroup {
+	struct kernfs_node	*mon_data_kn;
+	struct rdtgroup		*parent;
+	struct list_head	crdtgrp_list;
+	u32			rmid;
+};
+
+/**
+ * struct rdtgroup - store rdtgroup's data in resctrl file system.
+ * @kn:				kernfs node
+ * @resctrl_group_list:		linked list for all rdtgroups
+ * @closid:			closid for this rdtgroup
+ * @cpu_mask:			CPUs assigned to this rdtgroup
+ * @flags:			status bits
+ * @waitcount:			how many cpus expect to find this
+ *				group when they acquire resctrl_group_mutex
+ * @type:			indicates type of this rdtgroup - either
+ *				monitor only or ctrl_mon group
+ * @mon:			mongroup related data
+ */
+struct rdtgroup {
+	struct kernfs_node	*kn;
+	struct list_head	resctrl_group_list;
+	u32			closid;
+	struct cpumask		cpu_mask;
+	int			flags;
+	atomic_t		waitcount;
+	enum rdt_group_type	type;
+	struct mongroup		mon;
+};
+
+/* rdtgroup.flags */
+#define	RDT_DELETED		1
+
+/**
+ * struct rdt_domain - group of cpus sharing an RDT resource
+ * @list:	all instances of this resource
+ * @id:		unique id for this instance
+ * @cpu_mask:	which cpus share this resource
+ * @rmid_busy_llc:
+ *		bitmap of which limbo RMIDs are above threshold
+ * @mbm_total:	saved state for MBM total bandwidth
+ * @mbm_local:	saved state for MBM local bandwidth
+ * @mbm_over:	worker to periodically read MBM h/w counters
+ * @cqm_limbo:	worker to periodically read CQM h/w counters
+ * @mbm_work_cpu:
+ *		worker cpu for MBM h/w counters
+ * @cqm_work_cpu:
+ *		worker cpu for CQM h/w counters
+ * @ctrl_val:	array of cache or mem ctrl values (indexed by CLOSID)
+ * @new_ctrl:	new ctrl value to be loaded
+ * @have_new_ctrl: did user provide new_ctrl for this domain
+ */
+struct rdt_domain {
+	struct list_head	list;
+	int			id;
+	struct cpumask		cpu_mask;
+};
+
+extern struct mutex resctrl_group_mutex;
+
+extern struct resctrl_resource resctrl_resources_all[];
+
+int __init resctrl_group_init(void);
+
+enum {
+	MPAM_RESOURCE_L3,
+	MPAM_RESOURCE_L3DATA,
+	MPAM_RESOURCE_L3CODE,
+	MPAM_RESOURCE_L2,
+	MPAM_RESOURCE_L2DATA,
+	MPAM_RESOURCE_L2CODE,
+
+	/* Must be the last */
+	MPAM_NUM_RESOURCES,
+};
+
+void rdt_last_cmd_clear(void);
+void rdt_last_cmd_puts(const char *s);
+void rdt_last_cmd_printf(const char *fmt, ...);
+
+int alloc_rmid(void);
+void free_rmid(u32 rmid);
+int resctrl_group_mondata_show(struct seq_file *m, void *arg);
+void rmdir_mondata_subdir_allrdtgrp(struct resctrl_resource *r,
+				    unsigned int dom_id);
+void mkdir_mondata_subdir_allrdtgrp(struct resctrl_resource *r,
+				    struct rdt_domain *d);
+
+void closid_init(void);
+int closid_alloc(void);
+void closid_free(int closid);
+
+int cdp_enable(int level, int data_type, int code_type);
+void resctrl_resource_reset(void);
+void release_rdtgroupfs_options(void);
+int parse_rdtgroupfs_options(char *data);
+
+static inline int __resctrl_group_show_options(struct seq_file *seq)
+{
+	if (resctrl_resources_all[MPAM_RESOURCE_L3DATA].alloc_enabled)
+		seq_puts(seq, ",cdp");
+	return 0;
+}
+
+void post_resctrl_mount(void);
+#endif /* _ASM_ARM64_MPAM_H */
diff --git a/arch/arm64/include/asm/mpam_sched.h b/arch/arm64/include/asm/mpam_sched.h
new file mode 100644
index 000000000000..5978e98212b9
--- /dev/null
+++ b/arch/arm64/include/asm/mpam_sched.h
@@ -0,0 +1,33 @@
+#ifndef _ASM_ARM64_MPAM_SCHED_H
+#define _ASM_ARM64_MPAM_SCHED_H
+
+#include <linux/sched.h>
+#include <linux/jump_label.h>
+
+/**
+ * struct intel_pqr_state - State cache for the PQR MSR
+ * @cur_rmid:		The cached Resource Monitoring ID
+ * @cur_closid:	The cached Class Of Service ID
+ * @default_rmid:	The user assigned Resource Monitoring ID
+ * @default_closid:	The user assigned cached Class Of Service ID
+ *
+ * The upper 32 bits of IA32_PQR_ASSOC contain closid and the
+ * lower 10 bits rmid. The update to IA32_PQR_ASSOC always
+ * contains both parts, so we need to cache them. This also
+ * stores the user configured per cpu CLOSID and RMID.
+ *
+ * The cache also helps to avoid pointless updates if the value does
+ * not change.
+ */
+struct intel_pqr_state {
+	u32			cur_rmid;
+	u32			cur_closid;
+	u32			default_rmid;
+	u32			default_closid;
+};
+
+DECLARE_PER_CPU(struct intel_pqr_state, pqr_state);
+
+static inline void mpam_sched_in(void) {}
+
+#endif
diff --git a/arch/arm64/include/asm/resctrl.h b/arch/arm64/include/asm/resctrl.h
new file mode 100644
index 000000000000..b8c983fb68ce
--- /dev/null
+++ b/arch/arm64/include/asm/resctrl.h
@@ -0,0 +1,56 @@
+#ifndef _ASM_ARM64_RESCTRL_H
+#define _ASM_ARM64_RESCTRL_H
+
+#include <asm/mpam_sched.h>
+#include <asm/mpam.h>
+
+#define resctrl_group rdtgroup
+#define resctrl_alloc_capable rdt_alloc_capable
+#define resctrl_mon_capable rdt_mon_capable
+
+static inline int alloc_mon_id(void)
+{
+
+	return alloc_rmid();
+}
+
+static inline void free_mon_id(u32 id)
+{
+	free_rmid(id);
+}
+
+static inline void resctrl_id_init(void)
+{
+	closid_init();
+}
+
+static inline int resctrl_id_alloc(void)
+{
+	return closid_alloc();
+}
+
+static inline void resctrl_id_free(int id)
+{
+	closid_free(id);
+}
+
+void update_cpu_closid_rmid(void *info);
+void update_closid_rmid(const struct cpumask *cpu_mask, struct resctrl_group *r);
+int __resctrl_group_move_task(struct task_struct *tsk,
+				struct resctrl_group *rdtgrp);
+
+ssize_t resctrl_group_schemata_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off);
+
+int resctrl_group_schemata_show(struct kernfs_open_file *of,
+				struct seq_file *s, void *v);
+
+#define release_resctrl_group_fs_options release_rdtgroupfs_options
+#define parse_resctrl_group_fs_options parse_rdtgroupfs_options
+
+#define for_each_resctrl_resource(r)					\
+	for (r = resctrl_resources_all;					\
+	     r < resctrl_resources_all + MPAM_NUM_RESOURCES;		\
+	     r++)							\
+
+#endif /* _ASM_ARM64_RESCTRL_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 973cc777e7e4..c6b1ed993891 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -58,6 +58,7 @@ arm64-obj-$(CONFIG_CRASH_CORE)		+= crash_core.o
 arm64-obj-$(CONFIG_ARM_SDE_INTERFACE)	+= sdei.o
 arm64-obj-$(CONFIG_ARM64_SSBD)		+= ssbd.o
 arm64-obj-$(CONFIG_SDEI_WATCHDOG) 	+= watchdog_sdei.o
+arm64-obj-$(CONFIG_MPAM)		+= mpam.o mpam_ctrlmon.o mpam_mon.o
 
 obj-y					+= $(arm64-obj-y) vdso/ probes/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/mpam.c b/arch/arm64/kernel/mpam.c
new file mode 100644
index 000000000000..d56203bcfb68
--- /dev/null
+++ b/arch/arm64/kernel/mpam.c
@@ -0,0 +1,712 @@
+/*
+ * Resource Director Technology(RDT)
+ * - Cache Allocation code.
+ *
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * Authors:
+ *    Fenghua Yu &lt;fenghua.yu(a)intel.com&gt;
+ *    Tony Luck &lt;tony.luck(a)intel.com&gt;
+ *    Vikas Shivappa &lt;vikas.shivappa(a)intel.com&gt;
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * More information about RDT be found in the Intel (R) x86 Architecture
+ * Software Developer Manual June 2016, volume 3, section 17.17.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/cacheinfo.h>
+#include <linux/cpuhotplug.h>
+#include <linux/task_work.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/task.h>
+#include <linux/resctrlfs.h>
+
+#include <asm/mpam_sched.h>
+#include <asm/resctrl.h>
+
+/* Mutex to protect rdtgroup access. */
+DEFINE_MUTEX(resctrl_group_mutex);
+
+/*
+ * The cached intel_pqr_state is strictly per CPU and can never be
+ * updated from a remote CPU. Functions which modify the state
+ * are called with interrupts disabled and no preemption, which
+ * is sufficient for the protection.
+ */
+DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
+
+/*
+ * Used to store the max resource name width and max resource data width
+ * to display the schemata in a tabular format
+ */
+int max_name_width, max_data_width;
+
+/*
+ * Global boolean for rdt_alloc which is true if any
+ * resource allocation is enabled.
+ */
+bool rdt_alloc_capable;
+
+#define domain_init(id) LIST_HEAD_INIT(resctrl_resources_all[id].domains)
+
+struct resctrl_resource resctrl_resources_all[] = {
+	[MPAM_RESOURCE_L3] = {
+		.rid			= MPAM_RESOURCE_L3,
+		.name			= "L3",
+		.domains		= domain_init(MPAM_RESOURCE_L3),
+		.fflags			= RFTYPE_RES_CACHE,
+	},
+	[MPAM_RESOURCE_L3DATA] = {
+		.rid			= MPAM_RESOURCE_L3DATA,
+		.name			= "L3DATA",
+		.domains		= domain_init(MPAM_RESOURCE_L3DATA),
+		.fflags			= RFTYPE_RES_CACHE,
+	},
+	[MPAM_RESOURCE_L3CODE] = (
+		.rid			= MPAM_RESOURCE_L3CODE,
+		.name			= "L3CODE",
+		.domains		= domain_init(MPAM_RESOURCE_L3CODE),
+		.fflags			= RFTYPE_RES_CACHE,
+	},
+	[MPAM_RESOURCE_L2] = {
+		.rid			= MPAM_RESOURCE_L2,
+		.name			= "L2",
+		.domains		= domain_init(MPAM_RESOURCE_L2),
+		.fflags			= RFTYPE_RES_CACHE,
+	},
+	[MPAM_RESOURCE_L2DATA] = {
+		.rid			= MPAM_RESOURCE_L2DATA,
+		.name			= "L2DATA",
+		.domains		= domain_init(MPAM_RESOURCE_L2DATA),
+		.fflags			= RFTYPE_RES_CACHE,
+	},
+	[MPAM_RESOURCE_L2CODE] = {
+		.rid			= MPAM_RESOURCE_L2CODE,
+		.name			= "L2CODE",
+		.domains		= domain_init(MPAM_RESOURCE_L2CODE),
+		.fflags			= RFTYPE_RES_CACHE,
+	},
+};
+
+static void rdt_get_cache_alloc_cfg(int idx, struct resctrl_resource *r)
+{
+	r->alloc_capable = true;
+	r->alloc_enabled = true;
+}
+
+/*
+ * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
+ * we can keep a bitmap of free CLOSIDs in a single integer.
+ *
+ * Using a global CLOSID across all resources has some advantages and
+ * some drawbacks:
+ * + We can simply set "current->closid" to assign a task to a resource
+ *   group.
+ * + Context switch code can avoid extra memory references deciding which
+ *   CLOSID to load into the PQR_ASSOC MSR
+ * - We give up some options in configuring resource groups across multi-socket
+ *   systems.
+ * - Our choices on how to configure each resource become progressively more
+ *   limited as the number of resources grows.
+ */
+static int closid_free_map;
+
+void closid_init(void)
+{
+	struct resctrl_resource *r;
+	int resctrl_min_closid = 32;
+
+	closid_free_map = BIT_MASK(resctrl_min_closid) - 1;
+
+	/* CLOSID 0 is always reserved for the default group */
+	closid_free_map &= ~1;
+}
+
+int closid_alloc(void)
+{
+	u32 closid = ffs(closid_free_map);
+
+	if (closid == 0)
+		return -ENOSPC;
+	closid--;
+	closid_free_map &= ~(1 << closid);
+
+	return closid;
+}
+
+void closid_free(int closid)
+{
+	closid_free_map |= 1 << closid;
+}
+
+static void clear_closid_rmid(int cpu)
+{
+	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+}
+
+static int mpam_online_cpu(unsigned int cpu)
+{
+	pr_info("online cpu\n");
+	return 0;
+}
+
+static int mpam_offline_cpu(unsigned int cpu)
+{
+	pr_info("offline cpu\n");
+	return 0;
+}
+
+static __init bool get_rdt_alloc_resources(void)
+{
+	bool ret = false;
+
+	return ret;
+}
+
+static __init bool get_rdt_mon_resources(void)
+{
+
+	bool ret = false;
+
+	return ret;
+}
+
+static __init bool get_resctrl_resources(void)
+{
+	rdt_alloc_capable = get_rdt_alloc_resources();
+	rdt_mon_capable = get_rdt_mon_resources();
+
+	return (rdt_mon_capable || rdt_alloc_capable);
+}
+
+void post_resctrl_mount(void)
+{
+	struct rdt_domain *dom;
+	struct resctrl_resource *r;
+
+	if (rdt_alloc_capable)
+		static_branch_enable_cpuslocked(&resctrl_alloc_enable_key);
+	if (rdt_mon_capable)
+		static_branch_enable_cpuslocked(&resctrl_mon_enable_key);
+
+	if (rdt_alloc_capable || rdt_mon_capable)
+		static_branch_enable_cpuslocked(&resctrl_enable_key);
+}
+
+static int reset_all_ctrls(struct resctrl_resource *r)
+{
+	pr_info("%s\n", __func__);
+}
+
+void resctrl_resource_reset(void)
+{
+	struct resctrl_resource *r;
+
+	/*Put everything back to default values. */
+	for_each_resctrl_resource(r) {
+		if (r->alloc_enabled)
+			reset_all_ctrls(r);
+	}
+}
+
+void release_rdtgroupfs_options(void)
+{
+}
+
+int parse_rdtgroupfs_options(char *data)
+{
+	int ret = 0;
+
+	pr_err("Invalid mount option\n");
+
+	return ret;
+}
+
+
+/*
+ * This is safe against intel_resctrl_sched_in() called from __switch_to()
+ * because __switch_to() is executed with interrupts disabled. A local call
+ * from update_closid_rmid() is proteced against __switch_to() because
+ * preemption is disabled.
+ */
+void update_cpu_closid_rmid(void *info)
+{
+	struct rdtgroup *r = info;
+
+	if (r) {
+		this_cpu_write(pqr_state.default_closid, r->closid);
+		this_cpu_write(pqr_state.default_rmid, r->mon.rmid);
+	}
+
+	/*
+	 * We cannot unconditionally write the MSR because the current
+	 * executing task might have its own closid selected. Just reuse
+	 * the context switch code.
+	 */
+	mpam_sched_in();
+}
+
+/*
+ * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
+ *
+ * Per task closids/rmids must have been set up before calling this function.
+ */
+void
+update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
+{
+	int cpu = get_cpu();
+
+	if (cpumask_test_cpu(cpu, cpu_mask))
+		update_cpu_closid_rmid(r);
+	smp_call_function_many(cpu_mask, update_cpu_closid_rmid, r, 1);
+	put_cpu();
+}
+
+struct task_move_callback {
+	struct callback_head	work;
+	struct rdtgroup		*rdtgrp;
+};
+
+static void move_myself(struct callback_head *head)
+{
+	struct task_move_callback *callback;
+	struct rdtgroup *rdtgrp;
+
+	callback = container_of(head, struct task_move_callback, work);
+	rdtgrp = callback->rdtgrp;
+
+	/*
+	 * If resource group was deleted before this task work callback
+	 * was invoked, then assign the task to root group and free the
+	 * resource group.
+	 */
+	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
+	    (rdtgrp->flags & RDT_DELETED)) {
+		current->closid = 0;
+		current->rmid = 0;
+		kfree(rdtgrp);
+	}
+
+	preempt_disable();
+	/* update PQR_ASSOC MSR to make resource group go into effect */
+	mpam_sched_in();
+	preempt_enable();
+
+	kfree(callback);
+}
+
+int __resctrl_group_move_task(struct task_struct *tsk,
+				struct rdtgroup *rdtgrp)
+{
+	struct task_move_callback *callback;
+	int ret;
+
+	callback = kzalloc(sizeof(*callback), GFP_KERNEL);
+	if (!callback)
+		return -ENOMEM;
+	callback->work.func = move_myself;
+	callback->rdtgrp = rdtgrp;
+
+	/*
+	 * Take a refcount, so rdtgrp cannot be freed before the
+	 * callback has been invoked.
+	 */
+	atomic_inc(&rdtgrp->waitcount);
+	ret = task_work_add(tsk, &callback->work, true);
+	if (ret) {
+		/*
+		 * Task is exiting. Drop the refcount and free the callback.
+		 * No need to check the refcount as the group cannot be
+		 * deleted before the write function unlocks resctrl_group_mutex.
+		 */
+		atomic_dec(&rdtgrp->waitcount);
+		kfree(callback);
+		rdt_last_cmd_puts("task exited\n");
+	} else {
+		/*
+		 * For ctrl_mon groups move both closid and rmid.
+		 * For monitor groups, can move the tasks only from
+		 * their parent CTRL group.
+		 */
+		if (rdtgrp->type == RDTCTRL_GROUP) {
+			tsk->closid = rdtgrp->closid;
+			tsk->rmid = rdtgrp->mon.rmid;
+		} else if (rdtgrp->type == RDTMON_GROUP) {
+			if (rdtgrp->mon.parent->closid == tsk->closid) {
+				tsk->rmid = rdtgrp->mon.rmid;
+			} else {
+				rdt_last_cmd_puts("Can't move task to different control group\n");
+				ret = -EINVAL;
+			}
+		}
+	}
+	return ret;
+}
+
+static int resctrl_group_seqfile_show(struct seq_file *m, void *arg)
+{
+	struct kernfs_open_file *of = m->private;
+	struct rftype *rft = of->kn->priv;
+
+	if (rft->seq_show)
+		return rft->seq_show(of, m, arg);
+	return 0;
+}
+
+static ssize_t resctrl_group_file_write(struct kernfs_open_file *of, char *buf,
+				   size_t nbytes, loff_t off)
+{
+	struct rftype *rft = of->kn->priv;
+
+	if (rft->write)
+		return rft->write(of, buf, nbytes, off);
+
+	return -EINVAL;
+}
+
+struct kernfs_ops resctrl_group_kf_single_ops = {
+	.atomic_write_len	= PAGE_SIZE,
+	.write			= resctrl_group_file_write,
+	.seq_show		= resctrl_group_seqfile_show,
+};
+
+static bool is_cpu_list(struct kernfs_open_file *of)
+{
+	struct rftype *rft = of->kn->priv;
+
+	return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
+}
+
+static int resctrl_group_cpus_show(struct kernfs_open_file *of,
+			      struct seq_file *s, void *v)
+{
+	struct rdtgroup *rdtgrp;
+	int ret = 0;
+
+	rdtgrp = resctrl_group_kn_lock_live(of->kn);
+
+	if (rdtgrp) {
+		seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
+			   cpumask_pr_args(&rdtgrp->cpu_mask));
+	} else {
+		ret = -ENOENT;
+	}
+	resctrl_group_kn_unlock(of->kn);
+
+	return ret;
+}
+
+int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
+			   cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
+{
+	return 0;
+}
+
+int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
+			  cpumask_var_t tmpmask)
+{
+	return 0;
+}
+
+static ssize_t resctrl_group_cpus_write(struct kernfs_open_file *of,
+				   char *buf, size_t nbytes, loff_t off)
+{
+	cpumask_var_t tmpmask, newmask, tmpmask1;
+	struct rdtgroup *rdtgrp;
+	int ret;
+
+	if (!buf)
+		return -EINVAL;
+
+	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+		return -ENOMEM;
+	if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
+		free_cpumask_var(tmpmask);
+		return -ENOMEM;
+	}
+	if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
+		free_cpumask_var(tmpmask);
+		free_cpumask_var(newmask);
+		return -ENOMEM;
+	}
+
+	rdtgrp = resctrl_group_kn_lock_live(of->kn);
+	rdt_last_cmd_clear();
+	if (!rdtgrp) {
+		ret = -ENOENT;
+		rdt_last_cmd_puts("directory was removed\n");
+		goto unlock;
+	}
+
+	if (is_cpu_list(of))
+		ret = cpulist_parse(buf, newmask);
+	else
+		ret = cpumask_parse(buf, newmask);
+
+	if (ret) {
+		rdt_last_cmd_puts("bad cpu list/mask\n");
+		goto unlock;
+	}
+
+	/* check that user didn't specify any offline cpus */
+	cpumask_andnot(tmpmask, newmask, cpu_online_mask);
+	if (cpumask_weight(tmpmask)) {
+		ret = -EINVAL;
+		rdt_last_cmd_puts("can only assign online cpus\n");
+		goto unlock;
+	}
+
+	if (rdtgrp->type == RDTCTRL_GROUP)
+		ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
+	else if (rdtgrp->type == RDTMON_GROUP)
+		ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
+	else
+		ret = -EINVAL;
+
+unlock:
+	resctrl_group_kn_unlock(of->kn);
+	free_cpumask_var(tmpmask);
+	free_cpumask_var(newmask);
+	free_cpumask_var(tmpmask1);
+
+	return ret ?: nbytes;
+}
+
+
+static int resctrl_group_task_write_permission(struct task_struct *task,
+					  struct kernfs_open_file *of)
+{
+	const struct cred *tcred = get_task_cred(task);
+	const struct cred *cred = current_cred();
+	int ret = 0;
+
+	/*
+	 * Even if we're attaching all tasks in the thread group, we only
+	 * need to check permissions on one of them.
+	 */
+	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
+	    !uid_eq(cred->euid, tcred->uid) &&
+	    !uid_eq(cred->euid, tcred->suid)) {
+		rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
+		ret = -EPERM;
+	}
+
+	put_cred(tcred);
+	return ret;
+}
+
+static int resctrl_group_move_task(pid_t pid, struct rdtgroup *rdtgrp,
+			      struct kernfs_open_file *of)
+{
+	struct task_struct *tsk;
+	int ret;
+
+	rcu_read_lock();
+	if (pid) {
+		tsk = find_task_by_vpid(pid);
+		if (!tsk) {
+			rcu_read_unlock();
+			rdt_last_cmd_printf("No task %d\n", pid);
+			return -ESRCH;
+		}
+	} else {
+		tsk = current;
+	}
+
+	get_task_struct(tsk);
+	rcu_read_unlock();
+
+	ret = resctrl_group_task_write_permission(tsk, of);
+	if (!ret)
+		ret = __resctrl_group_move_task(tsk, rdtgrp);
+
+	put_task_struct(tsk);
+	return ret;
+}
+
+static struct seq_buf last_cmd_status;
+static char last_cmd_status_buf[512];
+
+void rdt_last_cmd_clear(void)
+{
+	lockdep_assert_held(&resctrl_group_mutex);
+	seq_buf_clear(&last_cmd_status);
+}
+
+void rdt_last_cmd_puts(const char *s)
+{
+	lockdep_assert_held(&resctrl_group_mutex);
+	seq_buf_puts(&last_cmd_status, s);
+}
+
+void rdt_last_cmd_printf(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	lockdep_assert_held(&resctrl_group_mutex);
+	seq_buf_vprintf(&last_cmd_status, fmt, ap);
+	va_end(ap);
+}
+
+static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
+				    struct seq_file *seq, void *v)
+{
+	int len;
+
+	mutex_lock(&resctrl_group_mutex);
+	len = seq_buf_used(&last_cmd_status);
+	if (len)
+		seq_printf(seq, "%.*s", len, last_cmd_status_buf);
+	else
+		seq_puts(seq, "ok\n");
+	mutex_unlock(&resctrl_group_mutex);
+	return 0;
+}
+
+static ssize_t resctrl_group_tasks_write(struct kernfs_open_file *of,
+				    char *buf, size_t nbytes, loff_t off)
+{
+	struct rdtgroup *rdtgrp;
+	int ret = 0;
+	pid_t pid;
+
+	if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
+		return -EINVAL;
+	rdtgrp = resctrl_group_kn_lock_live(of->kn);
+	rdt_last_cmd_clear();
+
+	if (rdtgrp)
+		ret = resctrl_group_move_task(pid, rdtgrp, of);
+	else
+		ret = -ENOENT;
+
+	resctrl_group_kn_unlock(of->kn);
+
+	return ret ?: nbytes;
+}
+
+static void show_resctrl_tasks(struct rdtgroup *r, struct seq_file *s)
+{
+	struct task_struct *p, *t;
+
+	rcu_read_lock();
+	for_each_process_thread(p, t) {
+		if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) ||
+		    (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid))
+			seq_printf(s, "%d\n", t->pid);
+	}
+	rcu_read_unlock();
+}
+
+static int resctrl_group_tasks_show(struct kernfs_open_file *of,
+			       struct seq_file *s, void *v)
+{
+	struct rdtgroup *rdtgrp;
+	int ret = 0;
+
+	rdtgrp = resctrl_group_kn_lock_live(of->kn);
+	if (rdtgrp)
+		show_resctrl_tasks(rdtgrp, s);
+	else
+		ret = -ENOENT;
+	resctrl_group_kn_unlock(of->kn);
+
+	return ret;
+}
+
+/* rdtgroup information files for one cache resource. */
+static struct rftype res_specific_files[] = {
+	{
+		.name		= "last_cmd_status",
+		.mode		= 0444,
+		.kf_ops		= &resctrl_group_kf_single_ops,
+		.seq_show	= rdt_last_cmd_status_show,
+		.fflags		= RF_TOP_INFO,
+	},
+	{
+		.name		= "cpus",
+		.mode		= 0644,
+		.kf_ops		= &resctrl_group_kf_single_ops,
+		.write		= resctrl_group_cpus_write,
+		.seq_show	= resctrl_group_cpus_show,
+		.fflags		= RFTYPE_BASE,
+	},
+	{
+		.name		= "cpus_list",
+		.mode		= 0644,
+		.kf_ops		= &resctrl_group_kf_single_ops,
+		.write		= resctrl_group_cpus_write,
+		.seq_show	= resctrl_group_cpus_show,
+		.flags		= RFTYPE_FLAGS_CPUS_LIST,
+		.fflags		= RFTYPE_BASE,
+	},
+	{
+		.name		= "tasks",
+		.mode		= 0644,
+		.kf_ops		= &resctrl_group_kf_single_ops,
+		.write		= resctrl_group_tasks_write,
+		.seq_show	= resctrl_group_tasks_show,
+		.fflags		= RFTYPE_BASE,
+	},
+	{
+		.name		= "schemata",
+		.mode		= 0644,
+		.kf_ops		= &resctrl_group_kf_single_ops,
+		.write		= resctrl_group_schemata_write,
+		.seq_show	= resctrl_group_schemata_show,
+		.fflags		= RF_CTRL_BASE,
+	},
+};
+
+static int __init mpam_late_init(void)
+{
+	struct resctrl_resource *r;
+	int state, ret;
+
+	if (!get_resctrl_resources())
+		return -ENODEV;
+
+	state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+				  "arm64/mpam:online:",
+				  mpam_online_cpu, mpam_offline_cpu);
+	if (state < 0)
+		return state;
+
+	register_resctrl_specific_files(res_specific_files, ARRAY_SIZE(res_specific_files));
+
+	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
+		     sizeof(last_cmd_status_buf));
+
+	ret = resctrl_group_init();
+	if (ret) {
+		cpuhp_remove_state(state);
+		return ret;
+	}
+
+	for_each_resctrl_resource(r) {
+		if (r->alloc_capable)
+			pr_info("MPAM %s allocation detected\n", r->name);
+	}
+
+	for_each_resctrl_resource(r) {
+		if (r->mon_capable)
+			pr_info("MPAM %s monitoring detected\n", r->name);
+	}
+
+	return 0;
+}
+
+late_initcall(mpam_late_init);
diff --git a/arch/arm64/kernel/mpam_ctrlmon.c b/arch/arm64/kernel/mpam_ctrlmon.c
new file mode 100644
index 000000000000..b5ef7e2163db
--- /dev/null
+++ b/arch/arm64/kernel/mpam_ctrlmon.c
@@ -0,0 +1,45 @@
+/*
+ * Resource Director Technology(RDT)
+ * - Cache Allocation code.
+ *
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * Authors:
+ *    Fenghua Yu &lt;fenghua.yu(a)intel.com&gt;
+ *    Tony Luck &lt;tony.luck(a)intel.com&gt;
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * More information about RDT be found in the Intel (R) x86 Architecture
+ * Software Developer Manual June 2016, volume 3, section 17.17.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/kernfs.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/resctrlfs.h>
+
+#include <asm/resctrl.h>
+
+
+ssize_t resctrl_group_schemata_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
+{
+	return 0;
+}
+
+int resctrl_group_schemata_show(struct kernfs_open_file *of,
+			   struct seq_file *s, void *v)
+{
+	seq_printf(s, "resctrl_group_schemata_show\n");
+	return 0;
+}
diff --git a/arch/arm64/kernel/mpam_mon.c b/arch/arm64/kernel/mpam_mon.c
new file mode 100644
index 000000000000..f4eacf14d0b3
--- /dev/null
+++ b/arch/arm64/kernel/mpam_mon.c
@@ -0,0 +1,51 @@
+/*
+ * Resource Director Technology(RDT)
+ * - Monitoring code
+ *
+ * Copyright (C) 2017 Intel Corporation
+ *
+ * Author:
+ *    Vikas Shivappa &lt;vikas.shivappa(a)intel.com&gt;
+ *
+ * This replaces the cqm.c based on perf but we reuse a lot of
+ * code and datastructures originally from Peter Zijlstra and Matt Fleming.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * More information about RDT be found in the Intel (R) x86 Architecture
+ * Software Developer Manual June 2016, volume 3, section 17.17.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/resctrlfs.h>
+
+#include <asm/resctrl.h>
+
+/*
+ * Global boolean for rdt_monitor which is true if any
+ * resource monitoring is enabled.
+ */
+bool rdt_mon_capable;
+
+/*
+ * As of now the RMIDs allocation is global.
+ * However we keep track of which packages the RMIDs
+ * are used to optimize the limbo list management.
+ */
+int alloc_rmid(void)
+{
+	return 0;
+}
+
+void free_rmid(u32 rmid)
+{
+}
+
diff --git a/fs/Kconfig b/fs/Kconfig
index e00606f51bf4..453eece9cbc1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -318,3 +318,12 @@ config IO_WQ
 	bool
 
 endmenu
+
+config RESCTRL_AARCH64
+	bool "Support Memory Partitioning and Monitoring"
+	default n
+	depends on MPAM
+	select KERNFS
+	help
+	  Memory Partitioning and Monitoring. More exactly Memory system
+	  performance resource Partitioning and Monitoring
diff --git a/fs/resctrlfs.c b/fs/resctrlfs.c
index 3016dea8c9b7..f2676e4ba817 100644
--- a/fs/resctrlfs.c
+++ b/fs/resctrlfs.c
@@ -186,19 +186,23 @@ static int resctrl_group_create_info_dir(struct kernfs_node
*parent_kn)
 	if (ret)
 		goto out_destroy;
 
-	for_each_alloc_enabled_resctrl_resource(r) {
-		fflags =  r->fflags | RF_CTRL_INFO;
-		ret = resctrl_group_mkdir_info_resdir(r, r->name, fflags);
-		if (ret)
-			goto out_destroy;
+	for_each_resctrl_resource(r) {
+		if (r->alloc_enabled) {
+			fflags =  r->fflags | RF_CTRL_INFO;
+			ret = resctrl_group_mkdir_info_resdir(r, r->name, fflags);
+			if (ret)
+				goto out_destroy;
+		}
 	}
 
-	for_each_mon_enabled_resctrl_resource(r) {
-		fflags =  r->fflags | RF_MON_INFO;
-		sprintf(name, "%s_MON", r->name);
-		ret = resctrl_group_mkdir_info_resdir(r, name, fflags);
-		if (ret)
-			goto out_destroy;
+	for_each_resctrl_resource(r) {
+		if (r->mon_enabled) {
+			fflags =  r->fflags | RF_MON_INFO;
+			sprintf(name, "%s_MON", r->name);
+			ret = resctrl_group_mkdir_info_resdir(r, name, fflags);
+			if (ret)
+				goto out_destroy;
+		}
 	}
 
 	/*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index cf41ea0fd552..22bec25942cf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1022,7 +1022,7 @@ struct task_struct {
 	/* cg_list protected by css_set_lock and tsk->alloc_lock: */
 	struct list_head		cg_list;
 #endif
-#ifdef CONFIG_RESCTRL
+#if defined CONFIG_RESCTRL || defined CONFIG_RESCTRL_AARCH64
 	u32				closid;
 	u32				rmid;
 #endif
-- 
2.31.0


    

2025

2024

2023

2022

2021

[ck] [PATCH openEuler v1 047/119] openEuler: resctrlfs: mpam: init struct for mpam