From: Yang Yingliang <yangyingliang(a)huawei.com>
commit 1abcabe9dab59eca2890c65ca81af0354c13bcd3 openEuler-1.0
hulk inclusion
category: feature
bugzilla: 5510
CVE: NA
Signed-off-by: Xie XiuQi <xiexiuqi(a)huawei.com>
Reviewed-by: Hanjun Guo <guohanjun(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
Signed-off-by: Xin Hao <haoxing990(a)gmail.com>
---
arch/arm64/Kconfig | 8 +
arch/arm64/include/asm/mpam.h | 137 ++++++
arch/arm64/include/asm/mpam_sched.h | 33 ++
arch/arm64/include/asm/resctrl.h | 56 +++
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/mpam.c | 712 ++++++++++++++++++++++++++++
arch/arm64/kernel/mpam_ctrlmon.c | 45 ++
arch/arm64/kernel/mpam_mon.c | 51 ++
fs/Kconfig | 9 +
fs/resctrlfs.c | 26 +-
include/linux/sched.h | 2 +-
11 files changed, 1068 insertions(+), 12 deletions(-)
create mode 100644 arch/arm64/include/asm/mpam.h
create mode 100644 arch/arm64/include/asm/mpam_sched.h
create mode 100644 arch/arm64/include/asm/resctrl.h
create mode 100644 arch/arm64/kernel/mpam.c
create mode 100644 arch/arm64/kernel/mpam_ctrlmon.c
create mode 100644 arch/arm64/kernel/mpam_mon.c
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8f77da2fa0e9..f7a8d2743b61 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -792,6 +792,14 @@ config HOTPLUG_CPU
Say Y here to experiment with turning CPUs off and on. CPUs
can be controlled through /sys/devices/system/cpu.
+config MPAM
+ bool "Support Memory Partitioning and Monitoring"
+ default n
+ select RESCTRL_AARCH64
+ help
+ Memory Partitioning and Monitoring. More exactly Memory system
+ performance resource Partitioning and Monitoring
+
# Common NUMA Features
config NUMA
bool "Numa Memory Allocation and Scheduler Support"
diff --git a/arch/arm64/include/asm/mpam.h b/arch/arm64/include/asm/mpam.h
new file mode 100644
index 000000000000..dd86be45ccc2
--- /dev/null
+++ b/arch/arm64/include/asm/mpam.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARM64_MPAM_H
+#define _ASM_ARM64_MPAM_H
+
+#include <linux/sched.h>
+#include <linux/kernfs.h>
+#include <linux/jump_label.h>
+
+#include <linux/seq_buf.h>
+#include <linux/seq_file.h>
+
+DECLARE_STATIC_KEY_FALSE(resctrl_enable_key);
+DECLARE_STATIC_KEY_FALSE(resctrl_mon_enable_key);
+
+extern bool rdt_alloc_capable;
+extern bool rdt_mon_capable;
+
+enum rdt_group_type {
+ RDTCTRL_GROUP = 0,
+ RDTMON_GROUP,
+ RDT_NUM_GROUP,
+};
+
+/**
+ * struct mongroup - store mon group's data in resctrl fs.
+ * @mon_data_kn kernlfs node for the mon_data directory
+ * @parent: parent rdtgrp
+ * @crdtgrp_list: child rdtgroup node list
+ * @rmid: rmid for this rdtgroup
+ */
+struct mongroup {
+ struct kernfs_node *mon_data_kn;
+ struct rdtgroup *parent;
+ struct list_head crdtgrp_list;
+ u32 rmid;
+};
+
+/**
+ * struct rdtgroup - store rdtgroup's data in resctrl file system.
+ * @kn: kernfs node
+ * @resctrl_group_list: linked list for all rdtgroups
+ * @closid: closid for this rdtgroup
+ * @cpu_mask: CPUs assigned to this rdtgroup
+ * @flags: status bits
+ * @waitcount: how many cpus expect to find this
+ * group when they acquire resctrl_group_mutex
+ * @type: indicates type of this rdtgroup - either
+ * monitor only or ctrl_mon group
+ * @mon: mongroup related data
+ */
+struct rdtgroup {
+ struct kernfs_node *kn;
+ struct list_head resctrl_group_list;
+ u32 closid;
+ struct cpumask cpu_mask;
+ int flags;
+ atomic_t waitcount;
+ enum rdt_group_type type;
+ struct mongroup mon;
+};
+
+/* rdtgroup.flags */
+#define RDT_DELETED 1
+
+/**
+ * struct rdt_domain - group of cpus sharing an RDT resource
+ * @list: all instances of this resource
+ * @id: unique id for this instance
+ * @cpu_mask: which cpus share this resource
+ * @rmid_busy_llc:
+ * bitmap of which limbo RMIDs are above threshold
+ * @mbm_total: saved state for MBM total bandwidth
+ * @mbm_local: saved state for MBM local bandwidth
+ * @mbm_over: worker to periodically read MBM h/w counters
+ * @cqm_limbo: worker to periodically read CQM h/w counters
+ * @mbm_work_cpu:
+ * worker cpu for MBM h/w counters
+ * @cqm_work_cpu:
+ * worker cpu for CQM h/w counters
+ * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
+ * @new_ctrl: new ctrl value to be loaded
+ * @have_new_ctrl: did user provide new_ctrl for this domain
+ */
+struct rdt_domain {
+ struct list_head list;
+ int id;
+ struct cpumask cpu_mask;
+};
+
+extern struct mutex resctrl_group_mutex;
+
+extern struct resctrl_resource resctrl_resources_all[];
+
+int __init resctrl_group_init(void);
+
+enum {
+ MPAM_RESOURCE_L3,
+ MPAM_RESOURCE_L3DATA,
+ MPAM_RESOURCE_L3CODE,
+ MPAM_RESOURCE_L2,
+ MPAM_RESOURCE_L2DATA,
+ MPAM_RESOURCE_L2CODE,
+
+ /* Must be the last */
+ MPAM_NUM_RESOURCES,
+};
+
+void rdt_last_cmd_clear(void);
+void rdt_last_cmd_puts(const char *s);
+void rdt_last_cmd_printf(const char *fmt, ...);
+
+int alloc_rmid(void);
+void free_rmid(u32 rmid);
+int resctrl_group_mondata_show(struct seq_file *m, void *arg);
+void rmdir_mondata_subdir_allrdtgrp(struct resctrl_resource *r,
+ unsigned int dom_id);
+void mkdir_mondata_subdir_allrdtgrp(struct resctrl_resource *r,
+ struct rdt_domain *d);
+
+void closid_init(void);
+int closid_alloc(void);
+void closid_free(int closid);
+
+int cdp_enable(int level, int data_type, int code_type);
+void resctrl_resource_reset(void);
+void release_rdtgroupfs_options(void);
+int parse_rdtgroupfs_options(char *data);
+
+static inline int __resctrl_group_show_options(struct seq_file *seq)
+{
+ if (resctrl_resources_all[MPAM_RESOURCE_L3DATA].alloc_enabled)
+ seq_puts(seq, ",cdp");
+ return 0;
+}
+
+void post_resctrl_mount(void);
+#endif /* _ASM_ARM64_MPAM_H */
diff --git a/arch/arm64/include/asm/mpam_sched.h b/arch/arm64/include/asm/mpam_sched.h
new file mode 100644
index 000000000000..5978e98212b9
--- /dev/null
+++ b/arch/arm64/include/asm/mpam_sched.h
@@ -0,0 +1,33 @@
+#ifndef _ASM_ARM64_MPAM_SCHED_H
+#define _ASM_ARM64_MPAM_SCHED_H
+
+#include <linux/sched.h>
+#include <linux/jump_label.h>
+
+/**
+ * struct intel_pqr_state - State cache for the PQR MSR
+ * @cur_rmid: The cached Resource Monitoring ID
+ * @cur_closid: The cached Class Of Service ID
+ * @default_rmid: The user assigned Resource Monitoring ID
+ * @default_closid: The user assigned cached Class Of Service ID
+ *
+ * The upper 32 bits of IA32_PQR_ASSOC contain closid and the
+ * lower 10 bits rmid. The update to IA32_PQR_ASSOC always
+ * contains both parts, so we need to cache them. This also
+ * stores the user configured per cpu CLOSID and RMID.
+ *
+ * The cache also helps to avoid pointless updates if the value does
+ * not change.
+ */
+struct intel_pqr_state {
+ u32 cur_rmid;
+ u32 cur_closid;
+ u32 default_rmid;
+ u32 default_closid;
+};
+
+DECLARE_PER_CPU(struct intel_pqr_state, pqr_state);
+
+static inline void mpam_sched_in(void) {}
+
+#endif
diff --git a/arch/arm64/include/asm/resctrl.h b/arch/arm64/include/asm/resctrl.h
new file mode 100644
index 000000000000..b8c983fb68ce
--- /dev/null
+++ b/arch/arm64/include/asm/resctrl.h
@@ -0,0 +1,56 @@
+#ifndef _ASM_ARM64_RESCTRL_H
+#define _ASM_ARM64_RESCTRL_H
+
+#include <asm/mpam_sched.h>
+#include <asm/mpam.h>
+
+#define resctrl_group rdtgroup
+#define resctrl_alloc_capable rdt_alloc_capable
+#define resctrl_mon_capable rdt_mon_capable
+
+static inline int alloc_mon_id(void)
+{
+
+ return alloc_rmid();
+}
+
+static inline void free_mon_id(u32 id)
+{
+ free_rmid(id);
+}
+
+static inline void resctrl_id_init(void)
+{
+ closid_init();
+}
+
+static inline int resctrl_id_alloc(void)
+{
+ return closid_alloc();
+}
+
+static inline void resctrl_id_free(int id)
+{
+ closid_free(id);
+}
+
+void update_cpu_closid_rmid(void *info);
+void update_closid_rmid(const struct cpumask *cpu_mask, struct resctrl_group *r);
+int __resctrl_group_move_task(struct task_struct *tsk,
+ struct resctrl_group *rdtgrp);
+
+ssize_t resctrl_group_schemata_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off);
+
+int resctrl_group_schemata_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v);
+
+#define release_resctrl_group_fs_options release_rdtgroupfs_options
+#define parse_resctrl_group_fs_options parse_rdtgroupfs_options
+
+#define for_each_resctrl_resource(r) \
+ for (r = resctrl_resources_all; \
+ r < resctrl_resources_all + MPAM_NUM_RESOURCES; \
+ r++) \
+
+#endif /* _ASM_ARM64_RESCTRL_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 973cc777e7e4..c6b1ed993891 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -58,6 +58,7 @@ arm64-obj-$(CONFIG_CRASH_CORE) += crash_core.o
arm64-obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
arm64-obj-$(CONFIG_ARM64_SSBD) += ssbd.o
arm64-obj-$(CONFIG_SDEI_WATCHDOG) += watchdog_sdei.o
+arm64-obj-$(CONFIG_MPAM) += mpam.o mpam_ctrlmon.o mpam_mon.o
obj-y += $(arm64-obj-y) vdso/ probes/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/mpam.c b/arch/arm64/kernel/mpam.c
new file mode 100644
index 000000000000..d56203bcfb68
--- /dev/null
+++ b/arch/arm64/kernel/mpam.c
@@ -0,0 +1,712 @@
+/*
+ * Resource Director Technology(RDT)
+ * - Cache Allocation code.
+ *
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * Authors:
+ * Fenghua Yu <fenghua.yu(a)intel.com>
+ * Tony Luck <tony.luck(a)intel.com>
+ * Vikas Shivappa <vikas.shivappa(a)intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * More information about RDT be found in the Intel (R) x86 Architecture
+ * Software Developer Manual June 2016, volume 3, section 17.17.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/cacheinfo.h>
+#include <linux/cpuhotplug.h>
+#include <linux/task_work.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/task.h>
+#include <linux/resctrlfs.h>
+
+#include <asm/mpam_sched.h>
+#include <asm/resctrl.h>
+
+/* Mutex to protect rdtgroup access. */
+DEFINE_MUTEX(resctrl_group_mutex);
+
+/*
+ * The cached intel_pqr_state is strictly per CPU and can never be
+ * updated from a remote CPU. Functions which modify the state
+ * are called with interrupts disabled and no preemption, which
+ * is sufficient for the protection.
+ */
+DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
+
+/*
+ * Used to store the max resource name width and max resource data width
+ * to display the schemata in a tabular format
+ */
+int max_name_width, max_data_width;
+
+/*
+ * Global boolean for rdt_alloc which is true if any
+ * resource allocation is enabled.
+ */
+bool rdt_alloc_capable;
+
+#define domain_init(id) LIST_HEAD_INIT(resctrl_resources_all[id].domains)
+
+struct resctrl_resource resctrl_resources_all[] = {
+ [MPAM_RESOURCE_L3] = {
+ .rid = MPAM_RESOURCE_L3,
+ .name = "L3",
+ .domains = domain_init(MPAM_RESOURCE_L3),
+ .fflags = RFTYPE_RES_CACHE,
+ },
+ [MPAM_RESOURCE_L3DATA] = {
+ .rid = MPAM_RESOURCE_L3DATA,
+ .name = "L3DATA",
+ .domains = domain_init(MPAM_RESOURCE_L3DATA),
+ .fflags = RFTYPE_RES_CACHE,
+ },
+ [MPAM_RESOURCE_L3CODE] = (
+ .rid = MPAM_RESOURCE_L3CODE,
+ .name = "L3CODE",
+ .domains = domain_init(MPAM_RESOURCE_L3CODE),
+ .fflags = RFTYPE_RES_CACHE,
+ },
+ [MPAM_RESOURCE_L2] = {
+ .rid = MPAM_RESOURCE_L2,
+ .name = "L2",
+ .domains = domain_init(MPAM_RESOURCE_L2),
+ .fflags = RFTYPE_RES_CACHE,
+ },
+ [MPAM_RESOURCE_L2DATA] = {
+ .rid = MPAM_RESOURCE_L2DATA,
+ .name = "L2DATA",
+ .domains = domain_init(MPAM_RESOURCE_L2DATA),
+ .fflags = RFTYPE_RES_CACHE,
+ },
+ [MPAM_RESOURCE_L2CODE] = {
+ .rid = MPAM_RESOURCE_L2CODE,
+ .name = "L2CODE",
+ .domains = domain_init(MPAM_RESOURCE_L2CODE),
+ .fflags = RFTYPE_RES_CACHE,
+ },
+};
+
+static void rdt_get_cache_alloc_cfg(int idx, struct resctrl_resource *r)
+{
+ r->alloc_capable = true;
+ r->alloc_enabled = true;
+}
+
+/*
+ * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
+ * we can keep a bitmap of free CLOSIDs in a single integer.
+ *
+ * Using a global CLOSID across all resources has some advantages and
+ * some drawbacks:
+ * + We can simply set "current->closid" to assign a task to a resource
+ * group.
+ * + Context switch code can avoid extra memory references deciding which
+ * CLOSID to load into the PQR_ASSOC MSR
+ * - We give up some options in configuring resource groups across multi-socket
+ * systems.
+ * - Our choices on how to configure each resource become progressively more
+ * limited as the number of resources grows.
+ */
+static int closid_free_map;
+
+void closid_init(void)
+{
+ struct resctrl_resource *r;
+ int resctrl_min_closid = 32;
+
+ closid_free_map = BIT_MASK(resctrl_min_closid) - 1;
+
+ /* CLOSID 0 is always reserved for the default group */
+ closid_free_map &= ~1;
+}
+
+int closid_alloc(void)
+{
+ u32 closid = ffs(closid_free_map);
+
+ if (closid == 0)
+ return -ENOSPC;
+ closid--;
+ closid_free_map &= ~(1 << closid);
+
+ return closid;
+}
+
+void closid_free(int closid)
+{
+ closid_free_map |= 1 << closid;
+}
+
+static void clear_closid_rmid(int cpu)
+{
+ struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+}
+
+static int mpam_online_cpu(unsigned int cpu)
+{
+ pr_info("online cpu\n");
+ return 0;
+}
+
+static int mpam_offline_cpu(unsigned int cpu)
+{
+ pr_info("offline cpu\n");
+ return 0;
+}
+
+static __init bool get_rdt_alloc_resources(void)
+{
+ bool ret = false;
+
+ return ret;
+}
+
+static __init bool get_rdt_mon_resources(void)
+{
+
+ bool ret = false;
+
+ return ret;
+}
+
+static __init bool get_resctrl_resources(void)
+{
+ rdt_alloc_capable = get_rdt_alloc_resources();
+ rdt_mon_capable = get_rdt_mon_resources();
+
+ return (rdt_mon_capable || rdt_alloc_capable);
+}
+
+void post_resctrl_mount(void)
+{
+ struct rdt_domain *dom;
+ struct resctrl_resource *r;
+
+ if (rdt_alloc_capable)
+ static_branch_enable_cpuslocked(&resctrl_alloc_enable_key);
+ if (rdt_mon_capable)
+ static_branch_enable_cpuslocked(&resctrl_mon_enable_key);
+
+ if (rdt_alloc_capable || rdt_mon_capable)
+ static_branch_enable_cpuslocked(&resctrl_enable_key);
+}
+
+static int reset_all_ctrls(struct resctrl_resource *r)
+{
+ pr_info("%s\n", __func__);
+}
+
+void resctrl_resource_reset(void)
+{
+ struct resctrl_resource *r;
+
+ /*Put everything back to default values. */
+ for_each_resctrl_resource(r) {
+ if (r->alloc_enabled)
+ reset_all_ctrls(r);
+ }
+}
+
+void release_rdtgroupfs_options(void)
+{
+}
+
+int parse_rdtgroupfs_options(char *data)
+{
+ int ret = 0;
+
+ pr_err("Invalid mount option\n");
+
+ return ret;
+}
+
+
+/*
+ * This is safe against intel_resctrl_sched_in() called from __switch_to()
+ * because __switch_to() is executed with interrupts disabled. A local call
+ * from update_closid_rmid() is proteced against __switch_to() because
+ * preemption is disabled.
+ */
+void update_cpu_closid_rmid(void *info)
+{
+ struct rdtgroup *r = info;
+
+ if (r) {
+ this_cpu_write(pqr_state.default_closid, r->closid);
+ this_cpu_write(pqr_state.default_rmid, r->mon.rmid);
+ }
+
+ /*
+ * We cannot unconditionally write the MSR because the current
+ * executing task might have its own closid selected. Just reuse
+ * the context switch code.
+ */
+ mpam_sched_in();
+}
+
+/*
+ * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
+ *
+ * Per task closids/rmids must have been set up before calling this function.
+ */
+void
+update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
+{
+ int cpu = get_cpu();
+
+ if (cpumask_test_cpu(cpu, cpu_mask))
+ update_cpu_closid_rmid(r);
+ smp_call_function_many(cpu_mask, update_cpu_closid_rmid, r, 1);
+ put_cpu();
+}
+
+struct task_move_callback {
+ struct callback_head work;
+ struct rdtgroup *rdtgrp;
+};
+
+static void move_myself(struct callback_head *head)
+{
+ struct task_move_callback *callback;
+ struct rdtgroup *rdtgrp;
+
+ callback = container_of(head, struct task_move_callback, work);
+ rdtgrp = callback->rdtgrp;
+
+ /*
+ * If resource group was deleted before this task work callback
+ * was invoked, then assign the task to root group and free the
+ * resource group.
+ */
+ if (atomic_dec_and_test(&rdtgrp->waitcount) &&
+ (rdtgrp->flags & RDT_DELETED)) {
+ current->closid = 0;
+ current->rmid = 0;
+ kfree(rdtgrp);
+ }
+
+ preempt_disable();
+ /* update PQR_ASSOC MSR to make resource group go into effect */
+ mpam_sched_in();
+ preempt_enable();
+
+ kfree(callback);
+}
+
+int __resctrl_group_move_task(struct task_struct *tsk,
+ struct rdtgroup *rdtgrp)
+{
+ struct task_move_callback *callback;
+ int ret;
+
+ callback = kzalloc(sizeof(*callback), GFP_KERNEL);
+ if (!callback)
+ return -ENOMEM;
+ callback->work.func = move_myself;
+ callback->rdtgrp = rdtgrp;
+
+ /*
+ * Take a refcount, so rdtgrp cannot be freed before the
+ * callback has been invoked.
+ */
+ atomic_inc(&rdtgrp->waitcount);
+ ret = task_work_add(tsk, &callback->work, true);
+ if (ret) {
+ /*
+ * Task is exiting. Drop the refcount and free the callback.
+ * No need to check the refcount as the group cannot be
+ * deleted before the write function unlocks resctrl_group_mutex.
+ */
+ atomic_dec(&rdtgrp->waitcount);
+ kfree(callback);
+ rdt_last_cmd_puts("task exited\n");
+ } else {
+ /*
+ * For ctrl_mon groups move both closid and rmid.
+ * For monitor groups, can move the tasks only from
+ * their parent CTRL group.
+ */
+ if (rdtgrp->type == RDTCTRL_GROUP) {
+ tsk->closid = rdtgrp->closid;
+ tsk->rmid = rdtgrp->mon.rmid;
+ } else if (rdtgrp->type == RDTMON_GROUP) {
+ if (rdtgrp->mon.parent->closid == tsk->closid) {
+ tsk->rmid = rdtgrp->mon.rmid;
+ } else {
+ rdt_last_cmd_puts("Can't move task to different control group\n");
+ ret = -EINVAL;
+ }
+ }
+ }
+ return ret;
+}
+
+static int resctrl_group_seqfile_show(struct seq_file *m, void *arg)
+{
+ struct kernfs_open_file *of = m->private;
+ struct rftype *rft = of->kn->priv;
+
+ if (rft->seq_show)
+ return rft->seq_show(of, m, arg);
+ return 0;
+}
+
+static ssize_t resctrl_group_file_write(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, loff_t off)
+{
+ struct rftype *rft = of->kn->priv;
+
+ if (rft->write)
+ return rft->write(of, buf, nbytes, off);
+
+ return -EINVAL;
+}
+
+struct kernfs_ops resctrl_group_kf_single_ops = {
+ .atomic_write_len = PAGE_SIZE,
+ .write = resctrl_group_file_write,
+ .seq_show = resctrl_group_seqfile_show,
+};
+
+static bool is_cpu_list(struct kernfs_open_file *of)
+{
+ struct rftype *rft = of->kn->priv;
+
+ return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
+}
+
+static int resctrl_group_cpus_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v)
+{
+ struct rdtgroup *rdtgrp;
+ int ret = 0;
+
+ rdtgrp = resctrl_group_kn_lock_live(of->kn);
+
+ if (rdtgrp) {
+ seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
+ cpumask_pr_args(&rdtgrp->cpu_mask));
+ } else {
+ ret = -ENOENT;
+ }
+ resctrl_group_kn_unlock(of->kn);
+
+ return ret;
+}
+
+int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
+ cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
+{
+ return 0;
+}
+
+int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
+ cpumask_var_t tmpmask)
+{
+ return 0;
+}
+
+static ssize_t resctrl_group_cpus_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ cpumask_var_t tmpmask, newmask, tmpmask1;
+ struct rdtgroup *rdtgrp;
+ int ret;
+
+ if (!buf)
+ return -EINVAL;
+
+ if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+ return -ENOMEM;
+ if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
+ free_cpumask_var(tmpmask);
+ return -ENOMEM;
+ }
+ if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
+ free_cpumask_var(tmpmask);
+ free_cpumask_var(newmask);
+ return -ENOMEM;
+ }
+
+ rdtgrp = resctrl_group_kn_lock_live(of->kn);
+ rdt_last_cmd_clear();
+ if (!rdtgrp) {
+ ret = -ENOENT;
+ rdt_last_cmd_puts("directory was removed\n");
+ goto unlock;
+ }
+
+ if (is_cpu_list(of))
+ ret = cpulist_parse(buf, newmask);
+ else
+ ret = cpumask_parse(buf, newmask);
+
+ if (ret) {
+ rdt_last_cmd_puts("bad cpu list/mask\n");
+ goto unlock;
+ }
+
+ /* check that user didn't specify any offline cpus */
+ cpumask_andnot(tmpmask, newmask, cpu_online_mask);
+ if (cpumask_weight(tmpmask)) {
+ ret = -EINVAL;
+ rdt_last_cmd_puts("can only assign online cpus\n");
+ goto unlock;
+ }
+
+ if (rdtgrp->type == RDTCTRL_GROUP)
+ ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
+ else if (rdtgrp->type == RDTMON_GROUP)
+ ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
+ else
+ ret = -EINVAL;
+
+unlock:
+ resctrl_group_kn_unlock(of->kn);
+ free_cpumask_var(tmpmask);
+ free_cpumask_var(newmask);
+ free_cpumask_var(tmpmask1);
+
+ return ret ?: nbytes;
+}
+
+
+static int resctrl_group_task_write_permission(struct task_struct *task,
+ struct kernfs_open_file *of)
+{
+ const struct cred *tcred = get_task_cred(task);
+ const struct cred *cred = current_cred();
+ int ret = 0;
+
+ /*
+ * Even if we're attaching all tasks in the thread group, we only
+ * need to check permissions on one of them.
+ */
+ if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
+ !uid_eq(cred->euid, tcred->uid) &&
+ !uid_eq(cred->euid, tcred->suid)) {
+ rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
+ ret = -EPERM;
+ }
+
+ put_cred(tcred);
+ return ret;
+}
+
+static int resctrl_group_move_task(pid_t pid, struct rdtgroup *rdtgrp,
+ struct kernfs_open_file *of)
+{
+ struct task_struct *tsk;
+ int ret;
+
+ rcu_read_lock();
+ if (pid) {
+ tsk = find_task_by_vpid(pid);
+ if (!tsk) {
+ rcu_read_unlock();
+ rdt_last_cmd_printf("No task %d\n", pid);
+ return -ESRCH;
+ }
+ } else {
+ tsk = current;
+ }
+
+ get_task_struct(tsk);
+ rcu_read_unlock();
+
+ ret = resctrl_group_task_write_permission(tsk, of);
+ if (!ret)
+ ret = __resctrl_group_move_task(tsk, rdtgrp);
+
+ put_task_struct(tsk);
+ return ret;
+}
+
+static struct seq_buf last_cmd_status;
+static char last_cmd_status_buf[512];
+
+void rdt_last_cmd_clear(void)
+{
+ lockdep_assert_held(&resctrl_group_mutex);
+ seq_buf_clear(&last_cmd_status);
+}
+
+void rdt_last_cmd_puts(const char *s)
+{
+ lockdep_assert_held(&resctrl_group_mutex);
+ seq_buf_puts(&last_cmd_status, s);
+}
+
+void rdt_last_cmd_printf(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ lockdep_assert_held(&resctrl_group_mutex);
+ seq_buf_vprintf(&last_cmd_status, fmt, ap);
+ va_end(ap);
+}
+
+static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ int len;
+
+ mutex_lock(&resctrl_group_mutex);
+ len = seq_buf_used(&last_cmd_status);
+ if (len)
+ seq_printf(seq, "%.*s", len, last_cmd_status_buf);
+ else
+ seq_puts(seq, "ok\n");
+ mutex_unlock(&resctrl_group_mutex);
+ return 0;
+}
+
+static ssize_t resctrl_group_tasks_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct rdtgroup *rdtgrp;
+ int ret = 0;
+ pid_t pid;
+
+ if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
+ return -EINVAL;
+ rdtgrp = resctrl_group_kn_lock_live(of->kn);
+ rdt_last_cmd_clear();
+
+ if (rdtgrp)
+ ret = resctrl_group_move_task(pid, rdtgrp, of);
+ else
+ ret = -ENOENT;
+
+ resctrl_group_kn_unlock(of->kn);
+
+ return ret ?: nbytes;
+}
+
+static void show_resctrl_tasks(struct rdtgroup *r, struct seq_file *s)
+{
+ struct task_struct *p, *t;
+
+ rcu_read_lock();
+ for_each_process_thread(p, t) {
+ if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) ||
+ (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid))
+ seq_printf(s, "%d\n", t->pid);
+ }
+ rcu_read_unlock();
+}
+
+static int resctrl_group_tasks_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v)
+{
+ struct rdtgroup *rdtgrp;
+ int ret = 0;
+
+ rdtgrp = resctrl_group_kn_lock_live(of->kn);
+ if (rdtgrp)
+ show_resctrl_tasks(rdtgrp, s);
+ else
+ ret = -ENOENT;
+ resctrl_group_kn_unlock(of->kn);
+
+ return ret;
+}
+
+/* rdtgroup information files for one cache resource. */
+static struct rftype res_specific_files[] = {
+ {
+ .name = "last_cmd_status",
+ .mode = 0444,
+ .kf_ops = &resctrl_group_kf_single_ops,
+ .seq_show = rdt_last_cmd_status_show,
+ .fflags = RF_TOP_INFO,
+ },
+ {
+ .name = "cpus",
+ .mode = 0644,
+ .kf_ops = &resctrl_group_kf_single_ops,
+ .write = resctrl_group_cpus_write,
+ .seq_show = resctrl_group_cpus_show,
+ .fflags = RFTYPE_BASE,
+ },
+ {
+ .name = "cpus_list",
+ .mode = 0644,
+ .kf_ops = &resctrl_group_kf_single_ops,
+ .write = resctrl_group_cpus_write,
+ .seq_show = resctrl_group_cpus_show,
+ .flags = RFTYPE_FLAGS_CPUS_LIST,
+ .fflags = RFTYPE_BASE,
+ },
+ {
+ .name = "tasks",
+ .mode = 0644,
+ .kf_ops = &resctrl_group_kf_single_ops,
+ .write = resctrl_group_tasks_write,
+ .seq_show = resctrl_group_tasks_show,
+ .fflags = RFTYPE_BASE,
+ },
+ {
+ .name = "schemata",
+ .mode = 0644,
+ .kf_ops = &resctrl_group_kf_single_ops,
+ .write = resctrl_group_schemata_write,
+ .seq_show = resctrl_group_schemata_show,
+ .fflags = RF_CTRL_BASE,
+ },
+};
+
+static int __init mpam_late_init(void)
+{
+ struct resctrl_resource *r;
+ int state, ret;
+
+ if (!get_resctrl_resources())
+ return -ENODEV;
+
+ state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "arm64/mpam:online:",
+ mpam_online_cpu, mpam_offline_cpu);
+ if (state < 0)
+ return state;
+
+ register_resctrl_specific_files(res_specific_files, ARRAY_SIZE(res_specific_files));
+
+ seq_buf_init(&last_cmd_status, last_cmd_status_buf,
+ sizeof(last_cmd_status_buf));
+
+ ret = resctrl_group_init();
+ if (ret) {
+ cpuhp_remove_state(state);
+ return ret;
+ }
+
+ for_each_resctrl_resource(r) {
+ if (r->alloc_capable)
+ pr_info("MPAM %s allocation detected\n", r->name);
+ }
+
+ for_each_resctrl_resource(r) {
+ if (r->mon_capable)
+ pr_info("MPAM %s monitoring detected\n", r->name);
+ }
+
+ return 0;
+}
+
+late_initcall(mpam_late_init);
diff --git a/arch/arm64/kernel/mpam_ctrlmon.c b/arch/arm64/kernel/mpam_ctrlmon.c
new file mode 100644
index 000000000000..b5ef7e2163db
--- /dev/null
+++ b/arch/arm64/kernel/mpam_ctrlmon.c
@@ -0,0 +1,45 @@
+/*
+ * Resource Director Technology(RDT)
+ * - Cache Allocation code.
+ *
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * Authors:
+ * Fenghua Yu <fenghua.yu(a)intel.com>
+ * Tony Luck <tony.luck(a)intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * More information about RDT be found in the Intel (R) x86 Architecture
+ * Software Developer Manual June 2016, volume 3, section 17.17.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernfs.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/resctrlfs.h>
+
+#include <asm/resctrl.h>
+
+
+ssize_t resctrl_group_schemata_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ return 0;
+}
+
+int resctrl_group_schemata_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v)
+{
+ seq_printf(s, "resctrl_group_schemata_show\n");
+ return 0;
+}
diff --git a/arch/arm64/kernel/mpam_mon.c b/arch/arm64/kernel/mpam_mon.c
new file mode 100644
index 000000000000..f4eacf14d0b3
--- /dev/null
+++ b/arch/arm64/kernel/mpam_mon.c
@@ -0,0 +1,51 @@
+/*
+ * Resource Director Technology(RDT)
+ * - Monitoring code
+ *
+ * Copyright (C) 2017 Intel Corporation
+ *
+ * Author:
+ * Vikas Shivappa <vikas.shivappa(a)intel.com>
+ *
+ * This replaces the cqm.c based on perf but we reuse a lot of
+ * code and datastructures originally from Peter Zijlstra and Matt Fleming.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * More information about RDT be found in the Intel (R) x86 Architecture
+ * Software Developer Manual June 2016, volume 3, section 17.17.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/resctrlfs.h>
+
+#include <asm/resctrl.h>
+
+/*
+ * Global boolean for rdt_monitor which is true if any
+ * resource monitoring is enabled.
+ */
+bool rdt_mon_capable;
+
+/*
+ * As of now the RMIDs allocation is global.
+ * However we keep track of which packages the RMIDs
+ * are used to optimize the limbo list management.
+ */
+int alloc_rmid(void)
+{
+ return 0;
+}
+
+void free_rmid(u32 rmid)
+{
+}
+
diff --git a/fs/Kconfig b/fs/Kconfig
index e00606f51bf4..453eece9cbc1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -318,3 +318,12 @@ config IO_WQ
bool
endmenu
+
+config RESCTRL_AARCH64
+ bool "Support Memory Partitioning and Monitoring"
+ default n
+ depends on MPAM
+ select KERNFS
+ help
+ Memory Partitioning and Monitoring. More exactly Memory system
+ performance resource Partitioning and Monitoring
diff --git a/fs/resctrlfs.c b/fs/resctrlfs.c
index 3016dea8c9b7..f2676e4ba817 100644
--- a/fs/resctrlfs.c
+++ b/fs/resctrlfs.c
@@ -186,19 +186,23 @@ static int resctrl_group_create_info_dir(struct kernfs_node
*parent_kn)
if (ret)
goto out_destroy;
- for_each_alloc_enabled_resctrl_resource(r) {
- fflags = r->fflags | RF_CTRL_INFO;
- ret = resctrl_group_mkdir_info_resdir(r, r->name, fflags);
- if (ret)
- goto out_destroy;
+ for_each_resctrl_resource(r) {
+ if (r->alloc_enabled) {
+ fflags = r->fflags | RF_CTRL_INFO;
+ ret = resctrl_group_mkdir_info_resdir(r, r->name, fflags);
+ if (ret)
+ goto out_destroy;
+ }
}
- for_each_mon_enabled_resctrl_resource(r) {
- fflags = r->fflags | RF_MON_INFO;
- sprintf(name, "%s_MON", r->name);
- ret = resctrl_group_mkdir_info_resdir(r, name, fflags);
- if (ret)
- goto out_destroy;
+ for_each_resctrl_resource(r) {
+ if (r->mon_enabled) {
+ fflags = r->fflags | RF_MON_INFO;
+ sprintf(name, "%s_MON", r->name);
+ ret = resctrl_group_mkdir_info_resdir(r, name, fflags);
+ if (ret)
+ goto out_destroy;
+ }
}
/*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index cf41ea0fd552..22bec25942cf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1022,7 +1022,7 @@ struct task_struct {
/* cg_list protected by css_set_lock and tsk->alloc_lock: */
struct list_head cg_list;
#endif
-#ifdef CONFIG_RESCTRL
+#if defined CONFIG_RESCTRL || defined CONFIG_RESCTRL_AARCH64
u32 closid;
u32 rmid;
#endif
--
2.31.0