summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt2
-rw-r--r--Documentation/filesystems/resctrl.rst325
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/include/asm/resctrl.h16
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c81
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h56
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c248
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--fs/resctrl/ctrlmondata.c26
-rw-r--r--fs/resctrl/internal.h58
-rw-r--r--fs/resctrl/monitor.c1008
-rw-r--r--fs/resctrl/rdtgroup.c259
-rw-r--r--include/linux/resctrl.h148
-rw-r--r--include/linux/resctrl_types.h18
16 files changed, 2021 insertions, 229 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 6607dbb1be44..bf1e6ca6aeb8 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6163,7 +6163,7 @@
rdt= [HW,X86,RDT]
Turn on/off individual RDT features. List is:
cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, l2cdp,
- mba, smba, bmec.
+ mba, smba, bmec, abmc.
E.g. to turn on cmt and turn off mba use:
rdt=cmt,!mba
diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst
index c7949dd44f2f..006d23af66e1 100644
--- a/Documentation/filesystems/resctrl.rst
+++ b/Documentation/filesystems/resctrl.rst
@@ -26,6 +26,7 @@ MBM (Memory Bandwidth Monitoring) "cqm_mbm_total", "cqm_mbm_local"
MBA (Memory Bandwidth Allocation) "mba"
SMBA (Slow Memory Bandwidth Allocation) ""
BMEC (Bandwidth Monitoring Event Configuration) ""
+ABMC (Assignable Bandwidth Monitoring Counters) ""
=============================================== ================================
Historically, new features were made visible by default in /proc/cpuinfo. This
@@ -256,6 +257,144 @@ with the following files:
# cat /sys/fs/resctrl/info/L3_MON/mbm_local_bytes_config
0=0x30;1=0x30;3=0x15;4=0x15
+"mbm_assign_mode":
+ The supported counter assignment modes. The enclosed brackets indicate which mode
+ is enabled. The MBM events associated with counters may reset when "mbm_assign_mode"
+ is changed.
+ ::
+
+ # cat /sys/fs/resctrl/info/L3_MON/mbm_assign_mode
+ [mbm_event]
+ default
+
+ "mbm_event":
+
+ mbm_event mode allows users to assign a hardware counter to an RMID, event
+ pair and monitor the bandwidth usage as long as it is assigned. The hardware
+ continues to track the assigned counter until it is explicitly unassigned by
+ the user. Each event within a resctrl group can be assigned independently.
+
+ In this mode, a monitoring event can only accumulate data while it is backed
+ by a hardware counter. Use "mbm_L3_assignments" found in each CTRL_MON and MON
+ group to specify which of the events should have a counter assigned. The number
+ of counters available is described in the "num_mbm_cntrs" file. Changing the
+ mode may cause all counters on the resource to reset.
+
+ Moving to mbm_event counter assignment mode requires users to assign the counters
+ to the events. Otherwise, the MBM event counters will return 'Unassigned' when read.
+
+ The mode is beneficial for AMD platforms that support more CTRL_MON
+ and MON groups than available hardware counters. By default, this
+ feature is enabled on AMD platforms with the ABMC (Assignable Bandwidth
+ Monitoring Counters) capability, ensuring counters remain assigned even
+ when the corresponding RMID is not actively used by any processor.
+
+ "default":
+
+ In default mode, resctrl assumes there is a hardware counter for each
+ event within every CTRL_MON and MON group. On AMD platforms, it is
+ recommended to use the mbm_event mode, if supported, to prevent reset of MBM
+ events between reads resulting from hardware re-allocating counters. This can
+ result in misleading values or display "Unavailable" if no counter is assigned
+ to the event.
+
+ * To enable "mbm_event" counter assignment mode:
+ ::
+
+ # echo "mbm_event" > /sys/fs/resctrl/info/L3_MON/mbm_assign_mode
+
+ * To enable "default" monitoring mode:
+ ::
+
+ # echo "default" > /sys/fs/resctrl/info/L3_MON/mbm_assign_mode
+
+"num_mbm_cntrs":
+ The maximum number of counters (total of available and assigned counters) in
+ each domain when the system supports mbm_event mode.
+
+ For example, on a system with maximum of 32 memory bandwidth monitoring
+ counters in each of its L3 domains:
+ ::
+
+ # cat /sys/fs/resctrl/info/L3_MON/num_mbm_cntrs
+ 0=32;1=32
+
+"available_mbm_cntrs":
+ The number of counters available for assignment in each domain when mbm_event
+ mode is enabled on the system.
+
+ For example, on a system with 30 available [hardware] assignable counters
+ in each of its L3 domains:
+ ::
+
+ # cat /sys/fs/resctrl/info/L3_MON/available_mbm_cntrs
+ 0=30;1=30
+
+"event_configs":
+ Directory that exists when "mbm_event" counter assignment mode is supported.
+ Contains a sub-directory for each MBM event that can be assigned to a counter.
+
+ Two MBM events are supported by default: mbm_local_bytes and mbm_total_bytes.
+ Each MBM event's sub-directory contains a file named "event_filter" that is
+ used to view and modify which memory transactions the MBM event is configured
+ with. The file is accessible only when "mbm_event" counter assignment mode is
+ enabled.
+
+ List of memory transaction types supported:
+
+ ========================== ========================================================
+ Name Description
+ ========================== ========================================================
+ dirty_victim_writes_all Dirty Victims from the QOS domain to all types of memory
+ remote_reads_slow_memory Reads to slow memory in the non-local NUMA domain
+ local_reads_slow_memory Reads to slow memory in the local NUMA domain
+ remote_non_temporal_writes Non-temporal writes to non-local NUMA domain
+ local_non_temporal_writes Non-temporal writes to local NUMA domain
+ remote_reads Reads to memory in the non-local NUMA domain
+ local_reads Reads to memory in the local NUMA domain
+ ========================== ========================================================
+
+ For example::
+
+ # cat /sys/fs/resctrl/info/L3_MON/event_configs/mbm_total_bytes/event_filter
+ local_reads,remote_reads,local_non_temporal_writes,remote_non_temporal_writes,
+ local_reads_slow_memory,remote_reads_slow_memory,dirty_victim_writes_all
+
+ # cat /sys/fs/resctrl/info/L3_MON/event_configs/mbm_local_bytes/event_filter
+ local_reads,local_non_temporal_writes,local_reads_slow_memory
+
+ Modify the event configuration by writing to the "event_filter" file within
+ the "event_configs" directory. The read/write "event_filter" file contains the
+ configuration of the event that reflects which memory transactions are counted by it.
+
+ For example::
+
+ # echo "local_reads, local_non_temporal_writes" >
+ /sys/fs/resctrl/info/L3_MON/event_configs/mbm_total_bytes/event_filter
+
+ # cat /sys/fs/resctrl/info/L3_MON/event_configs/mbm_total_bytes/event_filter
+ local_reads,local_non_temporal_writes
+
+"mbm_assign_on_mkdir":
+ Exists when "mbm_event" counter assignment mode is supported. Accessible
+ only when "mbm_event" counter assignment mode is enabled.
+
+ Determines if a counter will automatically be assigned to an RMID, MBM event
+ pair when its associated monitor group is created via mkdir. Enabled by default
+ on boot, also when switched from "default" mode to "mbm_event" counter assignment
+ mode. Users can disable this capability by writing to the interface.
+
+ "0":
+ Auto assignment is disabled.
+ "1":
+ Auto assignment is enabled.
+
+ Example::
+
+ # echo 0 > /sys/fs/resctrl/info/L3_MON/mbm_assign_on_mkdir
+ # cat /sys/fs/resctrl/info/L3_MON/mbm_assign_on_mkdir
+ 0
+
"max_threshold_occupancy":
Read/write file provides the largest value (in
bytes) at which a previously used LLC_occupancy
@@ -380,10 +519,77 @@ When monitoring is enabled all MON groups will also contain:
for the L3 cache they occupy). These are named "mon_sub_L3_YY"
where "YY" is the node number.
+ When the 'mbm_event' counter assignment mode is enabled, reading
+ an MBM event of a MON group returns 'Unassigned' if no hardware
+ counter is assigned to it. For CTRL_MON groups, 'Unassigned' is
+ returned if the MBM event does not have an assigned counter in the
+ CTRL_MON group nor in any of its associated MON groups.
+
"mon_hw_id":
Available only with debug option. The identifier used by hardware
for the monitor group. On x86 this is the RMID.
+When monitoring is enabled all MON groups may also contain:
+
+"mbm_L3_assignments":
+ Exists when "mbm_event" counter assignment mode is supported and lists the
+ counter assignment states of the group.
+
+ The assignment list is displayed in the following format:
+
+ <Event>:<Domain ID>=<Assignment state>;<Domain ID>=<Assignment state>
+
+ Event: A valid MBM event in the
+ /sys/fs/resctrl/info/L3_MON/event_configs directory.
+
+ Domain ID: A valid domain ID. When writing, '*' applies the changes
+ to all the domains.
+
+ Assignment states:
+
+ _ : No counter assigned.
+
+ e : Counter assigned exclusively.
+
+ Example:
+
+ To display the counter assignment states for the default group.
+ ::
+
+ # cd /sys/fs/resctrl
+ # cat /sys/fs/resctrl/mbm_L3_assignments
+ mbm_total_bytes:0=e;1=e
+ mbm_local_bytes:0=e;1=e
+
+ Assignments can be modified by writing to the interface.
+
+ Examples:
+
+ To unassign the counter associated with the mbm_total_bytes event on domain 0:
+ ::
+
+ # echo "mbm_total_bytes:0=_" > /sys/fs/resctrl/mbm_L3_assignments
+ # cat /sys/fs/resctrl/mbm_L3_assignments
+ mbm_total_bytes:0=_;1=e
+ mbm_local_bytes:0=e;1=e
+
+ To unassign the counter associated with the mbm_total_bytes event on all the domains:
+ ::
+
+ # echo "mbm_total_bytes:*=_" > /sys/fs/resctrl/mbm_L3_assignments
+ # cat /sys/fs/resctrl/mbm_L3_assignments
+ mbm_total_bytes:0=_;1=_
+ mbm_local_bytes:0=e;1=e
+
+ To assign a counter associated with the mbm_total_bytes event on all domains in
+ exclusive mode:
+ ::
+
+ # echo "mbm_total_bytes:*=e" > /sys/fs/resctrl/mbm_L3_assignments
+ # cat /sys/fs/resctrl/mbm_L3_assignments
+ mbm_total_bytes:0=e;1=e
+ mbm_local_bytes:0=e;1=e
+
When the "mba_MBps" mount option is used all CTRL_MON groups will also contain:
"mba_MBps_event":
@@ -1429,6 +1635,125 @@ View the llc occupancy snapshot::
# cat /sys/fs/resctrl/p1/mon_data/mon_L3_00/llc_occupancy
11234000
+
+Examples on working with mbm_assign_mode
+========================================
+
+a. Check if MBM counter assignment mode is supported.
+::
+
+ # mount -t resctrl resctrl /sys/fs/resctrl/
+
+ # cat /sys/fs/resctrl/info/L3_MON/mbm_assign_mode
+ [mbm_event]
+ default
+
+The "mbm_event" mode is detected and enabled.
+
+b. Check how many assignable counters are supported.
+::
+
+ # cat /sys/fs/resctrl/info/L3_MON/num_mbm_cntrs
+ 0=32;1=32
+
+c. Check how many assignable counters are available for assignment in each domain.
+::
+
+ # cat /sys/fs/resctrl/info/L3_MON/available_mbm_cntrs
+ 0=30;1=30
+
+d. To list the default group's assign states.
+::
+
+ # cat /sys/fs/resctrl/mbm_L3_assignments
+ mbm_total_bytes:0=e;1=e
+ mbm_local_bytes:0=e;1=e
+
+e. To unassign the counter associated with the mbm_total_bytes event on domain 0.
+::
+
+ # echo "mbm_total_bytes:0=_" > /sys/fs/resctrl/mbm_L3_assignments
+ # cat /sys/fs/resctrl/mbm_L3_assignments
+ mbm_total_bytes:0=_;1=e
+ mbm_local_bytes:0=e;1=e
+
+f. To unassign the counter associated with the mbm_total_bytes event on all domains.
+::
+
+ # echo "mbm_total_bytes:*=_" > /sys/fs/resctrl/mbm_L3_assignments
+ # cat /sys/fs/resctrl/mbm_L3_assignment
+ mbm_total_bytes:0=_;1=_
+ mbm_local_bytes:0=e;1=e
+
+g. To assign a counter associated with the mbm_total_bytes event on all domains in
+exclusive mode.
+::
+
+ # echo "mbm_total_bytes:*=e" > /sys/fs/resctrl/mbm_L3_assignments
+ # cat /sys/fs/resctrl/mbm_L3_assignments
+ mbm_total_bytes:0=e;1=e
+ mbm_local_bytes:0=e;1=e
+
+h. Read the events mbm_total_bytes and mbm_local_bytes of the default group. There is
+no change in reading the events with the assignment.
+::
+
+ # cat /sys/fs/resctrl/mon_data/mon_L3_00/mbm_total_bytes
+ 779247936
+ # cat /sys/fs/resctrl/mon_data/mon_L3_01/mbm_total_bytes
+ 562324232
+ # cat /sys/fs/resctrl/mon_data/mon_L3_00/mbm_local_bytes
+ 212122123
+ # cat /sys/fs/resctrl/mon_data/mon_L3_01/mbm_local_bytes
+ 121212144
+
+i. Check the event configurations.
+::
+
+ # cat /sys/fs/resctrl/info/L3_MON/event_configs/mbm_total_bytes/event_filter
+ local_reads,remote_reads,local_non_temporal_writes,remote_non_temporal_writes,
+ local_reads_slow_memory,remote_reads_slow_memory,dirty_victim_writes_all
+
+ # cat /sys/fs/resctrl/info/L3_MON/event_configs/mbm_local_bytes/event_filter
+ local_reads,local_non_temporal_writes,local_reads_slow_memory
+
+j. Change the event configuration for mbm_local_bytes.
+::
+
+ # echo "local_reads, local_non_temporal_writes, local_reads_slow_memory, remote_reads" >
+ /sys/fs/resctrl/info/L3_MON/event_configs/mbm_local_bytes/event_filter
+
+ # cat /sys/fs/resctrl/info/L3_MON/event_configs/mbm_local_bytes/event_filter
+ local_reads,local_non_temporal_writes,local_reads_slow_memory,remote_reads
+
+k. Now read the local events again. The first read may come back with "Unavailable"
+status. The subsequent read of mbm_local_bytes will display the current value.
+::
+
+ # cat /sys/fs/resctrl/mon_data/mon_L3_00/mbm_local_bytes
+ Unavailable
+ # cat /sys/fs/resctrl/mon_data/mon_L3_00/mbm_local_bytes
+ 2252323
+ # cat /sys/fs/resctrl/mon_data/mon_L3_01/mbm_local_bytes
+ Unavailable
+ # cat /sys/fs/resctrl/mon_data/mon_L3_01/mbm_local_bytes
+ 1566565
+
+l. Users have the option to go back to 'default' mbm_assign_mode if required. This can be
+done using the following command. Note that switching the mbm_assign_mode may reset all
+the MBM counters (and thus all MBM events) of all the resctrl groups.
+::
+
+ # echo "default" > /sys/fs/resctrl/info/L3_MON/mbm_assign_mode
+ # cat /sys/fs/resctrl/info/L3_MON/mbm_assign_mode
+ mbm_event
+ [default]
+
+m. Unmount the resctrl filesystem.
+::
+
+ # umount /sys/fs/resctrl/
+
Intel RDT Errata
================
diff --git a/MAINTAINERS b/MAINTAINERS
index a80cca9aa6ff..62d16c20a888 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -21186,6 +21186,7 @@ M: Tony Luck <tony.luck@intel.com>
M: Reinette Chatre <reinette.chatre@intel.com>
R: Dave Martin <Dave.Martin@arm.com>
R: James Morse <james.morse@arm.com>
+R: Babu Moger <babu.moger@amd.com>
L: linux-kernel@vger.kernel.org
S: Supported
F: Documentation/filesystems/resctrl.rst
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 751ca35386b0..b2a562217d3f 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -496,6 +496,7 @@
#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */
#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */
#define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */
+#define X86_FEATURE_ABMC (21*32+15) /* Assignable Bandwidth Monitoring Counters */
/*
* BUG word(s)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b60d3711a708..73393a66d3ab 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -1230,6 +1230,8 @@
/* - AMD: */
#define MSR_IA32_MBA_BW_BASE 0xc0000200
#define MSR_IA32_SMBA_BW_BASE 0xc0000280
+#define MSR_IA32_L3_QOS_ABMC_CFG 0xc00003fd
+#define MSR_IA32_L3_QOS_EXT_CFG 0xc00003ff
#define MSR_IA32_EVT_CFG_BASE 0xc0000400
/* AMD-V MSRs */
diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index feb93b50e990..575f8408a9e7 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -44,7 +44,6 @@ DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state);
extern bool rdt_alloc_capable;
extern bool rdt_mon_capable;
-extern unsigned int rdt_mon_features;
DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
@@ -84,21 +83,6 @@ static inline void resctrl_arch_disable_mon(void)
static_branch_dec_cpuslocked(&rdt_enable_key);
}
-static inline bool resctrl_arch_is_llc_occupancy_enabled(void)
-{
- return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID));
-}
-
-static inline bool resctrl_arch_is_mbm_total_enabled(void)
-{
- return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID));
-}
-
-static inline bool resctrl_arch_is_mbm_local_enabled(void)
-{
- return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID));
-}
-
/*
* __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR
*
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 187d527ef73b..06ca5a30140c 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -107,7 +107,7 @@ u32 resctrl_arch_system_num_rmid_idx(void)
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
/* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
- return r->num_rmid;
+ return r->mon.num_rmid;
}
struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
@@ -365,8 +365,10 @@ static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom)
static void mon_domain_free(struct rdt_hw_mon_domain *hw_dom)
{
- kfree(hw_dom->arch_mbm_total);
- kfree(hw_dom->arch_mbm_local);
+ int idx;
+
+ for_each_mbm_idx(idx)
+ kfree(hw_dom->arch_mbm_states[idx]);
kfree(hw_dom);
}
@@ -400,25 +402,27 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain *
*/
static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom)
{
- size_t tsize;
-
- if (resctrl_arch_is_mbm_total_enabled()) {
- tsize = sizeof(*hw_dom->arch_mbm_total);
- hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL);
- if (!hw_dom->arch_mbm_total)
- return -ENOMEM;
- }
- if (resctrl_arch_is_mbm_local_enabled()) {
- tsize = sizeof(*hw_dom->arch_mbm_local);
- hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL);
- if (!hw_dom->arch_mbm_local) {
- kfree(hw_dom->arch_mbm_total);
- hw_dom->arch_mbm_total = NULL;
- return -ENOMEM;
- }
+ size_t tsize = sizeof(*hw_dom->arch_mbm_states[0]);
+ enum resctrl_event_id eventid;
+ int idx;
+
+ for_each_mbm_event_id(eventid) {
+ if (!resctrl_is_mon_event_enabled(eventid))
+ continue;
+ idx = MBM_STATE_IDX(eventid);
+ hw_dom->arch_mbm_states[idx] = kcalloc(num_rmid, tsize, GFP_KERNEL);
+ if (!hw_dom->arch_mbm_states[idx])
+ goto cleanup;
}
return 0;
+cleanup:
+ for_each_mbm_idx(idx) {
+ kfree(hw_dom->arch_mbm_states[idx]);
+ hw_dom->arch_mbm_states[idx] = NULL;
+ }
+
+ return -ENOMEM;
}
static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
@@ -516,6 +520,9 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
d = container_of(hdr, struct rdt_mon_domain, hdr);
cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
+ /* Update the mbm_assign_mode state for the CPU if supported */
+ if (r->mon.mbm_cntr_assignable)
+ resctrl_arch_mbm_cntr_assign_set_one(r);
return;
}
@@ -535,9 +542,13 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
d->ci_id = ci->id;
cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
+ /* Update the mbm_assign_mode state for the CPU if supported */
+ if (r->mon.mbm_cntr_assignable)
+ resctrl_arch_mbm_cntr_assign_set_one(r);
+
arch_mon_domain_online(r, d);
- if (arch_domain_mbm_alloc(r->num_rmid, hw_dom)) {
+ if (arch_domain_mbm_alloc(r->mon.num_rmid, hw_dom)) {
mon_domain_free(hw_dom);
return;
}
@@ -707,6 +718,7 @@ enum {
RDT_FLAG_MBA,
RDT_FLAG_SMBA,
RDT_FLAG_BMEC,
+ RDT_FLAG_ABMC,
};
#define RDT_OPT(idx, n, f) \
@@ -732,6 +744,7 @@ static struct rdt_options rdt_options[] __ro_after_init = {
RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA),
RDT_OPT(RDT_FLAG_SMBA, "smba", X86_FEATURE_SMBA),
RDT_OPT(RDT_FLAG_BMEC, "bmec", X86_FEATURE_BMEC),
+ RDT_OPT(RDT_FLAG_ABMC, "abmc", X86_FEATURE_ABMC),
};
#define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options)
@@ -863,15 +876,24 @@ static __init bool get_rdt_alloc_resources(void)
static __init bool get_rdt_mon_resources(void)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ bool ret = false;
- if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC))
- rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID);
- if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL))
- rdt_mon_features |= (1 << QOS_L3_MBM_TOTAL_EVENT_ID);
- if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))
- rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID);
+ if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) {
+ resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID);
+ ret = true;
+ }
+ if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) {
+ resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID);
+ ret = true;
+ }
+ if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) {
+ resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID);
+ ret = true;
+ }
+ if (rdt_cpu_has(X86_FEATURE_ABMC))
+ ret = true;
- if (!rdt_mon_features)
+ if (!ret)
return false;
return !rdt_get_mon_l3_config(r);
@@ -965,7 +987,7 @@ static enum cpuhp_state rdt_online;
/* Runs once on the BSP during boot. */
void resctrl_cpu_detect(struct cpuinfo_x86 *c)
{
- if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
+ if (!cpu_has(c, X86_FEATURE_CQM_LLC) && !cpu_has(c, X86_FEATURE_ABMC)) {
c->x86_cache_max_rmid = -1;
c->x86_cache_occ_scale = -1;
c->x86_cache_mbm_width_offset = -1;
@@ -977,7 +999,8 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
- cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
+ cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL) ||
+ cpu_has(c, X86_FEATURE_ABMC)) {
u32 eax, ebx, ecx, edx;
/* QoS sub-leaf, EAX=0Fh, ECX=1 */
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 5e3c41b36437..9f4c2f0aaf5c 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -37,6 +37,15 @@ struct arch_mbm_state {
u64 prev_msr;
};
+/* Setting bit 0 in L3_QOS_EXT_CFG enables the ABMC feature. */
+#define ABMC_ENABLE_BIT 0
+
+/*
+ * Qos Event Identifiers.
+ */
+#define ABMC_EXTENDED_EVT_ID BIT(31)
+#define ABMC_EVT_ID BIT(0)
+
/**
* struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share
* a resource for a control function
@@ -54,15 +63,15 @@ struct rdt_hw_ctrl_domain {
* struct rdt_hw_mon_domain - Arch private attributes of a set of CPUs that share
* a resource for a monitor function
* @d_resctrl: Properties exposed to the resctrl file system
- * @arch_mbm_total: arch private state for MBM total bandwidth
- * @arch_mbm_local: arch private state for MBM local bandwidth
+ * @arch_mbm_states: Per-event pointer to the MBM event's saved state.
+ * An MBM event's state is an array of struct arch_mbm_state
+ * indexed by RMID on x86.
*
* Members of this structure are accessed via helpers that provide abstraction.
*/
struct rdt_hw_mon_domain {
struct rdt_mon_domain d_resctrl;
- struct arch_mbm_state *arch_mbm_total;
- struct arch_mbm_state *arch_mbm_local;
+ struct arch_mbm_state *arch_mbm_states[QOS_NUM_L3_MBM_EVENTS];
};
static inline struct rdt_hw_ctrl_domain *resctrl_to_arch_ctrl_dom(struct rdt_ctrl_domain *r)
@@ -102,6 +111,7 @@ struct msr_param {
* @mon_scale: cqm counter * mon_scale = occupancy in bytes
* @mbm_width: Monitor width, to detect and correct for overflow.
* @cdp_enabled: CDP state of this resource
+ * @mbm_cntr_assign_enabled: ABMC feature is enabled
*
* Members of this structure are either private to the architecture
* e.g. mbm_width, or accessed via helpers that provide abstraction. e.g.
@@ -115,6 +125,7 @@ struct rdt_hw_resource {
unsigned int mon_scale;
unsigned int mbm_width;
bool cdp_enabled;
+ bool mbm_cntr_assign_enabled;
};
static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r)
@@ -159,6 +170,42 @@ union cpuid_0x10_x_edx {
unsigned int full;
};
+/*
+ * ABMC counters are configured by writing to MSR_IA32_L3_QOS_ABMC_CFG.
+ *
+ * @bw_type : Event configuration that represents the memory
+ * transactions being tracked by the @cntr_id.
+ * @bw_src : Bandwidth source (RMID or CLOSID).
+ * @reserved1 : Reserved.
+ * @is_clos : @bw_src field is a CLOSID (not an RMID).
+ * @cntr_id : Counter identifier.
+ * @reserved : Reserved.
+ * @cntr_en : Counting enable bit.
+ * @cfg_en : Configuration enable bit.
+ *
+ * Configuration and counting:
+ * Counter can be configured across multiple writes to MSR. Configuration
+ * is applied only when @cfg_en = 1. Counter @cntr_id is reset when the
+ * configuration is applied.
+ * @cfg_en = 1, @cntr_en = 0 : Apply @cntr_id configuration but do not
+ * count events.
+ * @cfg_en = 1, @cntr_en = 1 : Apply @cntr_id configuration and start
+ * counting events.
+ */
+union l3_qos_abmc_cfg {
+ struct {
+ unsigned long bw_type :32,
+ bw_src :12,
+ reserved1: 3,
+ is_clos : 1,
+ cntr_id : 5,
+ reserved : 9,
+ cntr_en : 1,
+ cfg_en : 1;
+ } split;
+ unsigned long full;
+};
+
void rdt_ctrl_update(void *arg);
int rdt_get_mon_l3_config(struct rdt_resource *r);
@@ -168,5 +215,6 @@ bool rdt_cpu_has(int flag);
void __init intel_rdt_mbm_apply_quirk(void);
void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
+void resctrl_arch_mbm_cntr_assign_set_one(struct rdt_resource *r);
#endif /* _ASM_X86_RESCTRL_INTERNAL_H */
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index c261558276cd..c8945610d455 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -31,11 +31,6 @@
*/
bool rdt_mon_capable;
-/*
- * Global to indicate which monitoring events are enabled.
- */
-unsigned int rdt_mon_features;
-
#define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5))
static int snc_nodes_per_l3_cache = 1;
@@ -135,7 +130,7 @@ static int logical_rmid_to_physical_rmid(int cpu, int lrmid)
if (snc_nodes_per_l3_cache == 1)
return lrmid;
- return lrmid + (cpu_to_node(cpu) % snc_nodes_per_l3_cache) * r->num_rmid;
+ return lrmid + (cpu_to_node(cpu) % snc_nodes_per_l3_cache) * r->mon.num_rmid;
}
static int __rmid_read_phys(u32 prmid, enum resctrl_event_id eventid, u64 *val)
@@ -166,18 +161,14 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_do
u32 rmid,
enum resctrl_event_id eventid)
{
- switch (eventid) {
- case QOS_L3_OCCUP_EVENT_ID:
- return NULL;
- case QOS_L3_MBM_TOTAL_EVENT_ID:
- return &hw_dom->arch_mbm_total[rmid];
- case QOS_L3_MBM_LOCAL_EVENT_ID:
- return &hw_dom->arch_mbm_local[rmid];
- default:
- /* Never expect to get here */
- WARN_ON_ONCE(1);
+ struct arch_mbm_state *state;
+
+ if (!resctrl_is_mbm_event(eventid))
return NULL;
- }
+
+ state = hw_dom->arch_mbm_states[MBM_STATE_IDX(eventid)];
+
+ return state ? &state[rmid] : NULL;
}
void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
@@ -206,14 +197,16 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d)
{
struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
-
- if (resctrl_arch_is_mbm_total_enabled())
- memset(hw_dom->arch_mbm_total, 0,
- sizeof(*hw_dom->arch_mbm_total) * r->num_rmid);
-
- if (resctrl_arch_is_mbm_local_enabled())
- memset(hw_dom->arch_mbm_local, 0,
- sizeof(*hw_dom->arch_mbm_local) * r->num_rmid);
+ enum resctrl_event_id eventid;
+ int idx;
+
+ for_each_mbm_event_id(eventid) {
+ if (!resctrl_is_mon_event_enabled(eventid))
+ continue;
+ idx = MBM_STATE_IDX(eventid);
+ memset(hw_dom->arch_mbm_states[idx], 0,
+ sizeof(*hw_dom->arch_mbm_states[0]) * r->mon.num_rmid);
+ }
}
static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
@@ -224,15 +217,33 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
return chunks >> shift;
}
+static u64 get_corrected_val(struct rdt_resource *r, struct rdt_mon_domain *d,
+ u32 rmid, enum resctrl_event_id eventid, u64 msr_val)
+{
+ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+ struct arch_mbm_state *am;
+ u64 chunks;
+
+ am = get_arch_mbm_state(hw_dom, rmid, eventid);
+ if (am) {
+ am->chunks += mbm_overflow_count(am->prev_msr, msr_val,
+ hw_res->mbm_width);
+ chunks = get_corrected_mbm_count(rmid, am->chunks);
+ am->prev_msr = msr_val;
+ } else {
+ chunks = msr_val;
+ }
+
+ return chunks * hw_res->mon_scale;
+}
+
int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 unused, u32 rmid, enum resctrl_event_id eventid,
u64 *val, void *ignored)
{
- struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
- struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
int cpu = cpumask_any(&d->hdr.cpu_mask);
- struct arch_mbm_state *am;
- u64 msr_val, chunks;
+ u64 msr_val;
u32 prmid;
int ret;
@@ -243,17 +254,76 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
if (ret)
return ret;
+ *val = get_corrected_val(r, d, rmid, eventid, msr_val);
+
+ return 0;
+}
+
+static int __cntr_id_read(u32 cntr_id, u64 *val)
+{
+ u64 msr_val;
+
+ /*
+ * QM_EVTSEL Register definition:
+ * =======================================================
+ * Bits Mnemonic Description
+ * =======================================================
+ * 63:44 -- Reserved
+ * 43:32 RMID RMID or counter ID in ABMC mode
+ * when reading an MBM event
+ * 31 ExtendedEvtID Extended Event Identifier
+ * 30:8 -- Reserved
+ * 7:0 EvtID Event Identifier
+ * =======================================================
+ * The contents of a specific counter can be read by setting the
+ * following fields in QM_EVTSEL.ExtendedEvtID(=1) and
+ * QM_EVTSEL.EvtID = L3CacheABMC (=1) and setting QM_EVTSEL.RMID
+ * to the desired counter ID. Reading the QM_CTR then returns the
+ * contents of the specified counter. The RMID_VAL_ERROR bit is set
+ * if the counter configuration is invalid, or if an invalid counter
+ * ID is set in the QM_EVTSEL.RMID field. The RMID_VAL_UNAVAIL bit
+ * is set if the counter data is unavailable.
+ */
+ wrmsr(MSR_IA32_QM_EVTSEL, ABMC_EXTENDED_EVT_ID | ABMC_EVT_ID, cntr_id);
+ rdmsrl(MSR_IA32_QM_CTR, msr_val);
+
+ if (msr_val & RMID_VAL_ERROR)
+ return -EIO;
+ if (msr_val & RMID_VAL_UNAVAIL)
+ return -EINVAL;
+
+ *val = msr_val;
+ return 0;
+}
+
+void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+ u32 unused, u32 rmid, int cntr_id,
+ enum resctrl_event_id eventid)
+{
+ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+ struct arch_mbm_state *am;
+
am = get_arch_mbm_state(hw_dom, rmid, eventid);
if (am) {
- am->chunks += mbm_overflow_count(am->prev_msr, msr_val,
- hw_res->mbm_width);
- chunks = get_corrected_mbm_count(rmid, am->chunks);
- am->prev_msr = msr_val;
- } else {
- chunks = msr_val;
+ memset(am, 0, sizeof(*am));
+
+ /* Record any initial, non-zero count value. */
+ __cntr_id_read(cntr_id, &am->prev_msr);
}
+}
+
+int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d,
+ u32 unused, u32 rmid, int cntr_id,
+ enum resctrl_event_id eventid, u64 *val)
+{
+ u64 msr_val;
+ int ret;
+
+ ret = __cntr_id_read(cntr_id, &msr_val);
+ if (ret)
+ return ret;
- *val = chunks * hw_res->mon_scale;
+ *val = get_corrected_val(r, d, rmid, eventid, msr_val);
return 0;
}
@@ -346,12 +416,13 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
unsigned int threshold;
+ u32 eax, ebx, ecx, edx;
snc_nodes_per_l3_cache = snc_get_config();
resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024;
hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale / snc_nodes_per_l3_cache;
- r->num_rmid = (boot_cpu_data.x86_cache_max_rmid + 1) / snc_nodes_per_l3_cache;
+ r->mon.num_rmid = (boot_cpu_data.x86_cache_max_rmid + 1) / snc_nodes_per_l3_cache;
hw_res->mbm_width = MBM_CNTR_WIDTH_BASE;
if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX)
@@ -366,7 +437,7 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
*
* For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
*/
- threshold = resctrl_rmid_realloc_limit / r->num_rmid;
+ threshold = resctrl_rmid_realloc_limit / r->mon.num_rmid;
/*
* Because num_rmid may not be a power of two, round the value
@@ -375,12 +446,17 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
*/
resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(threshold);
- if (rdt_cpu_has(X86_FEATURE_BMEC)) {
- u32 eax, ebx, ecx, edx;
-
+ if (rdt_cpu_has(X86_FEATURE_BMEC) || rdt_cpu_has(X86_FEATURE_ABMC)) {
/* Detect list of bandwidth sources that can be tracked */
cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx);
- r->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS;
+ r->mon.mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS;
+ }
+
+ if (rdt_cpu_has(X86_FEATURE_ABMC)) {
+ r->mon.mbm_cntr_assignable = true;
+ cpuid_count(0x80000020, 5, &eax, &ebx, &ecx, &edx);
+ r->mon.num_mbm_cntrs = (ebx & GENMASK(15, 0)) + 1;
+ hw_res->mbm_cntr_assign_enabled = true;
}
r->mon_capable = true;
@@ -401,3 +477,91 @@ void __init intel_rdt_mbm_apply_quirk(void)
mbm_cf_rmidthreshold = mbm_cf_table[cf_index].rmidthreshold;
mbm_cf = mbm_cf_table[cf_index].cf;
}
+
+static void resctrl_abmc_set_one_amd(void *arg)
+{
+ bool *enable = arg;
+
+ if (*enable)
+ msr_set_bit(MSR_IA32_L3_QOS_EXT_CFG, ABMC_ENABLE_BIT);
+ else
+ msr_clear_bit(MSR_IA32_L3_QOS_EXT_CFG, ABMC_ENABLE_BIT);
+}
+
+/*
+ * ABMC enable/disable requires update of L3_QOS_EXT_CFG MSR on all the CPUs
+ * associated with all monitor domains.
+ */
+static void _resctrl_abmc_enable(struct rdt_resource *r, bool enable)
+{
+ struct rdt_mon_domain *d;
+
+ lockdep_assert_cpus_held();
+
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ on_each_cpu_mask(&d->hdr.cpu_mask, resctrl_abmc_set_one_amd,
+ &enable, 1);
+ resctrl_arch_reset_rmid_all(r, d);
+ }
+}
+
+int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable)
+{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+
+ if (r->mon.mbm_cntr_assignable &&
+ hw_res->mbm_cntr_assign_enabled != enable) {
+ _resctrl_abmc_enable(r, enable);
+ hw_res->mbm_cntr_assign_enabled = enable;
+ }
+
+ return 0;
+}
+
+bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r)
+{
+ return resctrl_to_arch_res(r)->mbm_cntr_assign_enabled;
+}
+
+static void resctrl_abmc_config_one_amd(void *info)
+{
+ union l3_qos_abmc_cfg *abmc_cfg = info;
+
+ wrmsrl(MSR_IA32_L3_QOS_ABMC_CFG, abmc_cfg->full);
+}
+
+/*
+ * Send an IPI to the domain to assign the counter to RMID, event pair.
+ */
+void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+ enum resctrl_event_id evtid, u32 rmid, u32 closid,
+ u32 cntr_id, bool assign)
+{
+ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+ union l3_qos_abmc_cfg abmc_cfg = { 0 };
+ struct arch_mbm_state *am;
+
+ abmc_cfg.split.cfg_en = 1;
+ abmc_cfg.split.cntr_en = assign ? 1 : 0;
+ abmc_cfg.split.cntr_id = cntr_id;
+ abmc_cfg.split.bw_src = rmid;
+ if (assign)
+ abmc_cfg.split.bw_type = resctrl_get_mon_evt_cfg(evtid);
+
+ smp_call_function_any(&d->hdr.cpu_mask, resctrl_abmc_config_one_amd, &abmc_cfg, 1);
+
+ /*
+ * The hardware counter is reset (because cfg_en == 1) so there is no
+ * need to record initial non-zero counts.
+ */
+ am = get_arch_mbm_state(hw_dom, rmid, evtid);
+ if (am)
+ memset(am, 0, sizeof(*am));
+}
+
+void resctrl_arch_mbm_cntr_assign_set_one(struct rdt_resource *r)
+{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+
+ resctrl_abmc_set_one_amd(&hw_res->mbm_cntr_assign_enabled);
+}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 6b868afb26c3..4cee6213d667 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -51,6 +51,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_COHERENCY_SFW_NO, CPUID_EBX, 31, 0x8000001f, 0 },
{ X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 },
{ X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
+ { X86_FEATURE_ABMC, CPUID_EBX, 5, 0x80000020, 0 },
{ X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 },
{ X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 },
{ X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 },
diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c
index 3c39cfacb251..0d0ef54fc4de 100644
--- a/fs/resctrl/ctrlmondata.c
+++ b/fs/resctrl/ctrlmondata.c
@@ -473,12 +473,12 @@ ssize_t rdtgroup_mba_mbps_event_write(struct kernfs_open_file *of,
rdt_last_cmd_clear();
if (!strcmp(buf, "mbm_local_bytes")) {
- if (resctrl_arch_is_mbm_local_enabled())
+ if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
rdtgrp->mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID;
else
ret = -EINVAL;
} else if (!strcmp(buf, "mbm_total_bytes")) {
- if (resctrl_arch_is_mbm_total_enabled())
+ if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
rdtgrp->mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID;
else
ret = -EINVAL;
@@ -563,10 +563,15 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
rr->r = r;
rr->d = d;
rr->first = first;
- rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid);
- if (IS_ERR(rr->arch_mon_ctx)) {
- rr->err = -EINVAL;
- return;
+ if (resctrl_arch_mbm_cntr_assign_enabled(r) &&
+ resctrl_is_mbm_event(evtid)) {
+ rr->is_mbm_cntr = true;
+ } else {
+ rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid);
+ if (IS_ERR(rr->arch_mon_ctx)) {
+ rr->err = -EINVAL;
+ return;
+ }
}
cpu = cpumask_any_housekeeping(cpumask, RESCTRL_PICK_ANY_CPU);
@@ -582,7 +587,8 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
else
smp_call_on_cpu(cpu, smp_mon_event_count, rr, false);
- resctrl_arch_mon_ctx_free(r, evtid, rr->arch_mon_ctx);
+ if (rr->arch_mon_ctx)
+ resctrl_arch_mon_ctx_free(r, evtid, rr->arch_mon_ctx);
}
int rdtgroup_mondata_show(struct seq_file *m, void *arg)
@@ -653,10 +659,16 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
checkresult:
+ /*
+ * -ENOENT is a special case, set only when "mbm_event" counter assignment
+ * mode is enabled and no counter has been assigned.
+ */
if (rr.err == -EIO)
seq_puts(m, "Error\n");
else if (rr.err == -EINVAL)
seq_puts(m, "Unavailable\n");
+ else if (rr.err == -ENOENT)
+ seq_puts(m, "Unassigned\n");
else
seq_printf(m, "%llu\n", rr.val);
diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
index 9a8cf6f11151..cf1fd82dc5a9 100644
--- a/fs/resctrl/internal.h
+++ b/fs/resctrl/internal.h
@@ -52,19 +52,31 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
}
/**
- * struct mon_evt - Entry in the event list of a resource
+ * struct mon_evt - Properties of a monitor event
* @evtid: event id
+ * @rid: resource id for this event
* @name: name of the event
+ * @evt_cfg: Event configuration value that represents the
+ * memory transactions (e.g., READS_TO_LOCAL_MEM,
+ * READS_TO_REMOTE_MEM) being tracked by @evtid.
+ * Only valid if @evtid is an MBM event.
* @configurable: true if the event is configurable
- * @list: entry in &rdt_resource->evt_list
+ * @enabled: true if the event is enabled
*/
struct mon_evt {
enum resctrl_event_id evtid;
+ enum resctrl_res_level rid;
char *name;
+ u32 evt_cfg;
bool configurable;
- struct list_head list;
+ bool enabled;
};
+extern struct mon_evt mon_event_all[QOS_NUM_EVENTS];
+
+#define for_each_mon_event(mevt) for (mevt = &mon_event_all[QOS_FIRST_EVENT]; \
+ mevt < &mon_event_all[QOS_NUM_EVENTS]; mevt++)
+
/**
* struct mon_data - Monitoring details for each event file.
* @list: Member of the global @mon_data_kn_priv_list list.
@@ -99,6 +111,8 @@ struct mon_data {
* @evtid: Which monitor event to read.
* @first: Initialize MBM counter when true.
* @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains.
+ * @is_mbm_cntr: true if "mbm_event" counter assignment mode is enabled and it
+ * is an MBM event.
* @err: Error encountered when reading counter.
* @val: Returned value of event counter. If @rgrp is a parent resource group,
* @val includes the sum of event counts from its child resource groups.
@@ -113,6 +127,7 @@ struct rmid_read {
enum resctrl_event_id evtid;
bool first;
struct cacheinfo *ci;
+ bool is_mbm_cntr;
int err;
u64 val;
void *arch_mon_ctx;
@@ -226,6 +241,8 @@ struct rdtgroup {
#define RFTYPE_DEBUG BIT(10)
+#define RFTYPE_ASSIGN_CONFIG BIT(11)
+
#define RFTYPE_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL)
#define RFTYPE_MON_INFO (RFTYPE_INFO | RFTYPE_MON)
@@ -375,6 +392,41 @@ bool closid_allocated(unsigned int closid);
int resctrl_find_cleanest_closid(void);
+void *rdt_kn_parent_priv(struct kernfs_node *kn);
+
+int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of, struct seq_file *s, void *v);
+
+ssize_t resctrl_mbm_assign_mode_write(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, loff_t off);
+
+void resctrl_bmec_files_show(struct rdt_resource *r, struct kernfs_node *l3_mon_kn,
+ bool show);
+
+int resctrl_num_mbm_cntrs_show(struct kernfs_open_file *of, struct seq_file *s, void *v);
+
+int resctrl_available_mbm_cntrs_show(struct kernfs_open_file *of, struct seq_file *s,
+ void *v);
+
+void rdtgroup_assign_cntrs(struct rdtgroup *rdtgrp);
+
+void rdtgroup_unassign_cntrs(struct rdtgroup *rdtgrp);
+
+int event_filter_show(struct kernfs_open_file *of, struct seq_file *seq, void *v);
+
+ssize_t event_filter_write(struct kernfs_open_file *of, char *buf, size_t nbytes,
+ loff_t off);
+
+int resctrl_mbm_assign_on_mkdir_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v);
+
+ssize_t resctrl_mbm_assign_on_mkdir_write(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, loff_t off);
+
+int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v);
+
+ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf, size_t nbytes,
+ loff_t off);
+
#ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK
int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
index 7326c28a7908..4076336fbba6 100644
--- a/fs/resctrl/monitor.c
+++ b/fs/resctrl/monitor.c
@@ -336,7 +336,7 @@ void free_rmid(u32 closid, u32 rmid)
entry = __rmid_entry(idx);
- if (resctrl_arch_is_llc_occupancy_enabled())
+ if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID))
add_rmid_to_limbo(entry);
else
list_add_tail(&entry->list, &rmid_free_lru);
@@ -346,27 +346,97 @@ static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
u32 rmid, enum resctrl_event_id evtid)
{
u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
+ struct mbm_state *state;
- switch (evtid) {
- case QOS_L3_MBM_TOTAL_EVENT_ID:
- return &d->mbm_total[idx];
- case QOS_L3_MBM_LOCAL_EVENT_ID:
- return &d->mbm_local[idx];
- default:
+ if (!resctrl_is_mbm_event(evtid))
return NULL;
+
+ state = d->mbm_states[MBM_STATE_IDX(evtid)];
+
+ return state ? &state[idx] : NULL;
+}
+
+/*
+ * mbm_cntr_get() - Return the counter ID for the matching @evtid and @rdtgrp.
+ *
+ * Return:
+ * Valid counter ID on success, or -ENOENT on failure.
+ */
+static int mbm_cntr_get(struct rdt_resource *r, struct rdt_mon_domain *d,
+ struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
+{
+ int cntr_id;
+
+ if (!r->mon.mbm_cntr_assignable)
+ return -ENOENT;
+
+ if (!resctrl_is_mbm_event(evtid))
+ return -ENOENT;
+
+ for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
+ if (d->cntr_cfg[cntr_id].rdtgrp == rdtgrp &&
+ d->cntr_cfg[cntr_id].evtid == evtid)
+ return cntr_id;
+ }
+
+ return -ENOENT;
+}
+
+/*
+ * mbm_cntr_alloc() - Initialize and return a new counter ID in the domain @d.
+ * Caller must ensure that the specified event is not assigned already.
+ *
+ * Return:
+ * Valid counter ID on success, or -ENOSPC on failure.
+ */
+static int mbm_cntr_alloc(struct rdt_resource *r, struct rdt_mon_domain *d,
+ struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
+{
+ int cntr_id;
+
+ for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
+ if (!d->cntr_cfg[cntr_id].rdtgrp) {
+ d->cntr_cfg[cntr_id].rdtgrp = rdtgrp;
+ d->cntr_cfg[cntr_id].evtid = evtid;
+ return cntr_id;
+ }
}
+
+ return -ENOSPC;
}
-static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
+/*
+ * mbm_cntr_free() - Clear the counter ID configuration details in the domain @d.
+ */
+static void mbm_cntr_free(struct rdt_mon_domain *d, int cntr_id)
+{
+ memset(&d->cntr_cfg[cntr_id], 0, sizeof(*d->cntr_cfg));
+}
+
+static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
{
int cpu = smp_processor_id();
+ u32 closid = rdtgrp->closid;
+ u32 rmid = rdtgrp->mon.rmid;
struct rdt_mon_domain *d;
+ int cntr_id = -ENOENT;
struct mbm_state *m;
int err, ret;
u64 tval = 0;
+ if (rr->is_mbm_cntr) {
+ cntr_id = mbm_cntr_get(rr->r, rr->d, rdtgrp, rr->evtid);
+ if (cntr_id < 0) {
+ rr->err = -ENOENT;
+ return -EINVAL;
+ }
+ }
+
if (rr->first) {
- resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);
+ if (rr->is_mbm_cntr)
+ resctrl_arch_reset_cntr(rr->r, rr->d, closid, rmid, cntr_id, rr->evtid);
+ else
+ resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);
m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
if (m)
memset(m, 0, sizeof(struct mbm_state));
@@ -377,8 +447,12 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
/* Reading a single domain, must be on a CPU in that domain. */
if (!cpumask_test_cpu(cpu, &rr->d->hdr.cpu_mask))
return -EINVAL;
- rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid,
- rr->evtid, &tval, rr->arch_mon_ctx);
+ if (rr->is_mbm_cntr)
+ rr->err = resctrl_arch_cntr_read(rr->r, rr->d, closid, rmid, cntr_id,
+ rr->evtid, &tval);
+ else
+ rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid,
+ rr->evtid, &tval, rr->arch_mon_ctx);
if (rr->err)
return rr->err;
@@ -402,8 +476,12 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
if (d->ci_id != rr->ci->id)
continue;
- err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
- rr->evtid, &tval, rr->arch_mon_ctx);
+ if (rr->is_mbm_cntr)
+ err = resctrl_arch_cntr_read(rr->r, d, closid, rmid, cntr_id,
+ rr->evtid, &tval);
+ else
+ err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
+ rr->evtid, &tval, rr->arch_mon_ctx);
if (!err) {
rr->val += tval;
ret = 0;
@@ -419,8 +497,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
/*
* mbm_bw_count() - Update bw count from values previously read by
* __mon_event_count().
- * @closid: The closid used to identify the cached mbm_state.
- * @rmid: The rmid used to identify the cached mbm_state.
+ * @rdtgrp: resctrl group associated with the CLOSID and RMID to identify
+ * the cached mbm_state.
* @rr: The struct rmid_read populated by __mon_event_count().
*
* Supporting function to calculate the memory bandwidth
@@ -428,9 +506,11 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
* __mon_event_count() is compared with the chunks value from the previous
* invocation. This must be called once per second to maintain values in MBps.
*/
-static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr)
+static void mbm_bw_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
{
u64 cur_bw, bytes, cur_bytes;
+ u32 closid = rdtgrp->closid;
+ u32 rmid = rdtgrp->mon.rmid;
struct mbm_state *m;
m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
@@ -459,7 +539,7 @@ void mon_event_count(void *info)
rdtgrp = rr->rgrp;
- ret = __mon_event_count(rdtgrp->closid, rdtgrp->mon.rmid, rr);
+ ret = __mon_event_count(rdtgrp, rr);
/*
* For Ctrl groups read data from child monitor groups and
@@ -470,8 +550,7 @@ void mon_event_count(void *info)
if (rdtgrp->type == RDTCTRL_GROUP) {
list_for_each_entry(entry, head, mon.crdtgrp_list) {
- if (__mon_event_count(entry->closid, entry->mon.rmid,
- rr) == 0)
+ if (__mon_event_count(entry, rr) == 0)
ret = 0;
}
}
@@ -602,44 +681,49 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm)
}
static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain *d,
- u32 closid, u32 rmid, enum resctrl_event_id evtid)
+ struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
{
struct rmid_read rr = {0};
rr.r = r;
rr.d = d;
rr.evtid = evtid;
- rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
- if (IS_ERR(rr.arch_mon_ctx)) {
- pr_warn_ratelimited("Failed to allocate monitor context: %ld",
- PTR_ERR(rr.arch_mon_ctx));
- return;
+ if (resctrl_arch_mbm_cntr_assign_enabled(r)) {
+ rr.is_mbm_cntr = true;
+ } else {
+ rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
+ if (IS_ERR(rr.arch_mon_ctx)) {
+ pr_warn_ratelimited("Failed to allocate monitor context: %ld",
+ PTR_ERR(rr.arch_mon_ctx));
+ return;
+ }
}
- __mon_event_count(closid, rmid, &rr);
+ __mon_event_count(rdtgrp, &rr);
/*
* If the software controller is enabled, compute the
* bandwidth for this event id.
*/
if (is_mba_sc(NULL))
- mbm_bw_count(closid, rmid, &rr);
+ mbm_bw_count(rdtgrp, &rr);
- resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
+ if (rr.arch_mon_ctx)
+ resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
}
static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d,
- u32 closid, u32 rmid)
+ struct rdtgroup *rdtgrp)
{
/*
* This is protected from concurrent reads from user as both
* the user and overflow handler hold the global mutex.
*/
- if (resctrl_arch_is_mbm_total_enabled())
- mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_TOTAL_EVENT_ID);
+ if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
+ mbm_update_one_event(r, d, rdtgrp, QOS_L3_MBM_TOTAL_EVENT_ID);
- if (resctrl_arch_is_mbm_local_enabled())
- mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_LOCAL_EVENT_ID);
+ if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
+ mbm_update_one_event(r, d, rdtgrp, QOS_L3_MBM_LOCAL_EVENT_ID);
}
/*
@@ -712,11 +796,11 @@ void mbm_handle_overflow(struct work_struct *work)
d = container_of(work, struct rdt_mon_domain, mbm_over.work);
list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
- mbm_update(r, d, prgrp->closid, prgrp->mon.rmid);
+ mbm_update(r, d, prgrp);
head = &prgrp->mon.crdtgrp_list;
list_for_each_entry(crgrp, head, mon.crdtgrp_list)
- mbm_update(r, d, crgrp->closid, crgrp->mon.rmid);
+ mbm_update(r, d, crgrp);
if (is_mba_sc(NULL))
update_mba_bw(prgrp, d);
@@ -842,38 +926,819 @@ out_unlock:
mutex_unlock(&rdtgroup_mutex);
}
-static struct mon_evt llc_occupancy_event = {
- .name = "llc_occupancy",
- .evtid = QOS_L3_OCCUP_EVENT_ID,
+/*
+ * All available events. Architecture code marks the ones that
+ * are supported by a system using resctrl_enable_mon_event()
+ * to set .enabled.
+ */
+struct mon_evt mon_event_all[QOS_NUM_EVENTS] = {
+ [QOS_L3_OCCUP_EVENT_ID] = {
+ .name = "llc_occupancy",
+ .evtid = QOS_L3_OCCUP_EVENT_ID,
+ .rid = RDT_RESOURCE_L3,
+ },
+ [QOS_L3_MBM_TOTAL_EVENT_ID] = {
+ .name = "mbm_total_bytes",
+ .evtid = QOS_L3_MBM_TOTAL_EVENT_ID,
+ .rid = RDT_RESOURCE_L3,
+ },
+ [QOS_L3_MBM_LOCAL_EVENT_ID] = {
+ .name = "mbm_local_bytes",
+ .evtid = QOS_L3_MBM_LOCAL_EVENT_ID,
+ .rid = RDT_RESOURCE_L3,
+ },
};
-static struct mon_evt mbm_total_event = {
- .name = "mbm_total_bytes",
- .evtid = QOS_L3_MBM_TOTAL_EVENT_ID,
+void resctrl_enable_mon_event(enum resctrl_event_id eventid)
+{
+ if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS))
+ return;
+ if (mon_event_all[eventid].enabled) {
+ pr_warn("Duplicate enable for event %d\n", eventid);
+ return;
+ }
+
+ mon_event_all[eventid].enabled = true;
+}
+
+bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid)
+{
+ return eventid >= QOS_FIRST_EVENT && eventid < QOS_NUM_EVENTS &&
+ mon_event_all[eventid].enabled;
+}
+
+u32 resctrl_get_mon_evt_cfg(enum resctrl_event_id evtid)
+{
+ return mon_event_all[evtid].evt_cfg;
+}
+
+/**
+ * struct mbm_transaction - Memory transaction an MBM event can be configured with.
+ * @name: Name of memory transaction (read, write ...).
+ * @val: The bit (eg. READS_TO_LOCAL_MEM or READS_TO_REMOTE_MEM) used to
+ * represent the memory transaction within an event's configuration.
+ */
+struct mbm_transaction {
+ char name[32];
+ u32 val;
};
-static struct mon_evt mbm_local_event = {
- .name = "mbm_local_bytes",
- .evtid = QOS_L3_MBM_LOCAL_EVENT_ID,
+/* Decoded values for each type of memory transaction. */
+static struct mbm_transaction mbm_transactions[NUM_MBM_TRANSACTIONS] = {
+ {"local_reads", READS_TO_LOCAL_MEM},
+ {"remote_reads", READS_TO_REMOTE_MEM},
+ {"local_non_temporal_writes", NON_TEMP_WRITE_TO_LOCAL_MEM},
+ {"remote_non_temporal_writes", NON_TEMP_WRITE_TO_REMOTE_MEM},
+ {"local_reads_slow_memory", READS_TO_LOCAL_S_MEM},
+ {"remote_reads_slow_memory", READS_TO_REMOTE_S_MEM},
+ {"dirty_victim_writes_all", DIRTY_VICTIMS_TO_ALL_MEM},
};
+int event_filter_show(struct kernfs_open_file *of, struct seq_file *seq, void *v)
+{
+ struct mon_evt *mevt = rdt_kn_parent_priv(of->kn);
+ struct rdt_resource *r;
+ bool sep = false;
+ int ret = 0, i;
+
+ mutex_lock(&rdtgroup_mutex);
+ rdt_last_cmd_clear();
+
+ r = resctrl_arch_get_resource(mevt->rid);
+ if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
+ rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ for (i = 0; i < NUM_MBM_TRANSACTIONS; i++) {
+ if (mevt->evt_cfg & mbm_transactions[i].val) {
+ if (sep)
+ seq_putc(seq, ',');
+ seq_printf(seq, "%s", mbm_transactions[i].name);
+ sep = true;
+ }
+ }
+ seq_putc(seq, '\n');
+
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
+
+ return ret;
+}
+
+int resctrl_mbm_assign_on_mkdir_show(struct kernfs_open_file *of, struct seq_file *s,
+ void *v)
+{
+ struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
+ int ret = 0;
+
+ mutex_lock(&rdtgroup_mutex);
+ rdt_last_cmd_clear();
+
+ if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
+ rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ seq_printf(s, "%u\n", r->mon.mbm_assign_on_mkdir);
+
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
+
+ return ret;
+}
+
+ssize_t resctrl_mbm_assign_on_mkdir_write(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, loff_t off)
+{
+ struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
+ bool value;
+ int ret;
+
+ ret = kstrtobool(buf, &value);
+ if (ret)
+ return ret;
+
+ mutex_lock(&rdtgroup_mutex);
+ rdt_last_cmd_clear();
+
+ if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
+ rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ r->mon.mbm_assign_on_mkdir = value;
+
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
+
+ return ret ?: nbytes;
+}
+
+/*
+ * mbm_cntr_free_all() - Clear all the counter ID configuration details in the
+ * domain @d. Called when mbm_assign_mode is changed.
+ */
+static void mbm_cntr_free_all(struct rdt_resource *r, struct rdt_mon_domain *d)
+{
+ memset(d->cntr_cfg, 0, sizeof(*d->cntr_cfg) * r->mon.num_mbm_cntrs);
+}
+
+/*
+ * resctrl_reset_rmid_all() - Reset all non-architecture states for all the
+ * supported RMIDs.
+ */
+static void resctrl_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d)
+{
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
+ enum resctrl_event_id evt;
+ int idx;
+
+ for_each_mbm_event_id(evt) {
+ if (!resctrl_is_mon_event_enabled(evt))
+ continue;
+ idx = MBM_STATE_IDX(evt);
+ memset(d->mbm_states[idx], 0, sizeof(*d->mbm_states[0]) * idx_limit);
+ }
+}
+
+/*
+ * rdtgroup_assign_cntr() - Assign/unassign the counter ID for the event, RMID
+ * pair in the domain.
+ *
+ * Assign the counter if @assign is true else unassign the counter. Reset the
+ * associated non-architectural state.
+ */
+static void rdtgroup_assign_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+ enum resctrl_event_id evtid, u32 rmid, u32 closid,
+ u32 cntr_id, bool assign)
+{
+ struct mbm_state *m;
+
+ resctrl_arch_config_cntr(r, d, evtid, rmid, closid, cntr_id, assign);
+
+ m = get_mbm_state(d, closid, rmid, evtid);
+ if (m)
+ memset(m, 0, sizeof(*m));
+}
+
+/*
+ * rdtgroup_alloc_assign_cntr() - Allocate a counter ID and assign it to the event
+ * pointed to by @mevt and the resctrl group @rdtgrp within the domain @d.
+ *
+ * Return:
+ * 0 on success, < 0 on failure.
+ */
+static int rdtgroup_alloc_assign_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+ struct rdtgroup *rdtgrp, struct mon_evt *mevt)
+{
+ int cntr_id;
+
+ /* No action required if the counter is assigned already. */
+ cntr_id = mbm_cntr_get(r, d, rdtgrp, mevt->evtid);
+ if (cntr_id >= 0)
+ return 0;
+
+ cntr_id = mbm_cntr_alloc(r, d, rdtgrp, mevt->evtid);
+ if (cntr_id < 0) {
+ rdt_last_cmd_printf("Failed to allocate counter for %s in domain %d\n",
+ mevt->name, d->hdr.id);
+ return cntr_id;
+ }
+
+ rdtgroup_assign_cntr(r, d, mevt->evtid, rdtgrp->mon.rmid, rdtgrp->closid, cntr_id, true);
+
+ return 0;
+}
+
/*
- * Initialize the event list for the resource.
+ * rdtgroup_assign_cntr_event() - Assign a hardware counter for the event in
+ * @mevt to the resctrl group @rdtgrp. Assign counters to all domains if @d is
+ * NULL; otherwise, assign the counter to the specified domain @d.
+ *
+ * If all counters in a domain are already in use, rdtgroup_alloc_assign_cntr()
+ * will fail. The assignment process will abort at the first failure encountered
+ * during domain traversal, which may result in the event being only partially
+ * assigned.
*
- * Note that MBM events are also part of RDT_RESOURCE_L3 resource
- * because as per the SDM the total and local memory bandwidth
- * are enumerated as part of L3 monitoring.
+ * Return:
+ * 0 on success, < 0 on failure.
+ */
+static int rdtgroup_assign_cntr_event(struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
+ struct mon_evt *mevt)
+{
+ struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
+ int ret = 0;
+
+ if (!d) {
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt);
+ if (ret)
+ return ret;
+ }
+ } else {
+ ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt);
+ }
+
+ return ret;
+}
+
+/*
+ * rdtgroup_assign_cntrs() - Assign counters to MBM events. Called when
+ * a new group is created.
+ *
+ * Each group can accommodate two counters per domain: one for the total
+ * event and one for the local event. Assignments may fail due to the limited
+ * number of counters. However, it is not necessary to fail the group creation
+ * and thus no failure is returned. Users have the option to modify the
+ * counter assignments after the group has been created.
+ */
+void rdtgroup_assign_cntrs(struct rdtgroup *rdtgrp)
+{
+ struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
+
+ if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r) ||
+ !r->mon.mbm_assign_on_mkdir)
+ return;
+
+ if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
+ rdtgroup_assign_cntr_event(NULL, rdtgrp,
+ &mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID]);
+
+ if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
+ rdtgroup_assign_cntr_event(NULL, rdtgrp,
+ &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]);
+}
+
+/*
+ * rdtgroup_free_unassign_cntr() - Unassign and reset the counter ID configuration
+ * for the event pointed to by @mevt within the domain @d and resctrl group @rdtgrp.
+ */
+static void rdtgroup_free_unassign_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+ struct rdtgroup *rdtgrp, struct mon_evt *mevt)
+{
+ int cntr_id;
+
+ cntr_id = mbm_cntr_get(r, d, rdtgrp, mevt->evtid);
+
+ /* If there is no cntr_id assigned, nothing to do */
+ if (cntr_id < 0)
+ return;
+
+ rdtgroup_assign_cntr(r, d, mevt->evtid, rdtgrp->mon.rmid, rdtgrp->closid, cntr_id, false);
+
+ mbm_cntr_free(d, cntr_id);
+}
+
+/*
+ * rdtgroup_unassign_cntr_event() - Unassign a hardware counter associated with
+ * the event structure @mevt from the domain @d and the group @rdtgrp. Unassign
+ * the counters from all the domains if @d is NULL else unassign from @d.
+ */
+static void rdtgroup_unassign_cntr_event(struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
+ struct mon_evt *mevt)
+{
+ struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
+
+ if (!d) {
+ list_for_each_entry(d, &r->mon_domains, hdr.list)
+ rdtgroup_free_unassign_cntr(r, d, rdtgrp, mevt);
+ } else {
+ rdtgroup_free_unassign_cntr(r, d, rdtgrp, mevt);
+ }
+}
+
+/*
+ * rdtgroup_unassign_cntrs() - Unassign the counters associated with MBM events.
+ * Called when a group is deleted.
*/
-static void l3_mon_evt_init(struct rdt_resource *r)
+void rdtgroup_unassign_cntrs(struct rdtgroup *rdtgrp)
{
- INIT_LIST_HEAD(&r->evt_list);
+ struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
- if (resctrl_arch_is_llc_occupancy_enabled())
- list_add_tail(&llc_occupancy_event.list, &r->evt_list);
- if (resctrl_arch_is_mbm_total_enabled())
- list_add_tail(&mbm_total_event.list, &r->evt_list);
- if (resctrl_arch_is_mbm_local_enabled())
- list_add_tail(&mbm_local_event.list, &r->evt_list);
+ if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r))
+ return;
+
+ if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
+ rdtgroup_unassign_cntr_event(NULL, rdtgrp,
+ &mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID]);
+
+ if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
+ rdtgroup_unassign_cntr_event(NULL, rdtgrp,
+ &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]);
+}
+
+static int resctrl_parse_mem_transactions(char *tok, u32 *val)
+{
+ u32 temp_val = 0;
+ char *evt_str;
+ bool found;
+ int i;
+
+next_config:
+ if (!tok || tok[0] == '\0') {
+ *val = temp_val;
+ return 0;
+ }
+
+ /* Start processing the strings for each memory transaction type */
+ evt_str = strim(strsep(&tok, ","));
+ found = false;
+ for (i = 0; i < NUM_MBM_TRANSACTIONS; i++) {
+ if (!strcmp(mbm_transactions[i].name, evt_str)) {
+ temp_val |= mbm_transactions[i].val;
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ rdt_last_cmd_printf("Invalid memory transaction type %s\n", evt_str);
+ return -EINVAL;
+ }
+
+ goto next_config;
+}
+
+/*
+ * rdtgroup_update_cntr_event - Update the counter assignments for the event
+ * in a group.
+ * @r: Resource to which update needs to be done.
+ * @rdtgrp: Resctrl group.
+ * @evtid: MBM monitor event.
+ */
+static void rdtgroup_update_cntr_event(struct rdt_resource *r, struct rdtgroup *rdtgrp,
+ enum resctrl_event_id evtid)
+{
+ struct rdt_mon_domain *d;
+ int cntr_id;
+
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ cntr_id = mbm_cntr_get(r, d, rdtgrp, evtid);
+ if (cntr_id >= 0)
+ rdtgroup_assign_cntr(r, d, evtid, rdtgrp->mon.rmid,
+ rdtgrp->closid, cntr_id, true);
+ }
+}
+
+/*
+ * resctrl_update_cntr_allrdtgrp - Update the counter assignments for the event
+ * for all the groups.
+ * @mevt MBM Monitor event.
+ */
+static void resctrl_update_cntr_allrdtgrp(struct mon_evt *mevt)
+{
+ struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
+ struct rdtgroup *prgrp, *crgrp;
+
+ /*
+ * Find all the groups where the event is assigned and update the
+ * configuration of existing assignments.
+ */
+ list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
+ rdtgroup_update_cntr_event(r, prgrp, mevt->evtid);
+
+ list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
+ rdtgroup_update_cntr_event(r, crgrp, mevt->evtid);
+ }
+}
+
+ssize_t event_filter_write(struct kernfs_open_file *of, char *buf, size_t nbytes,
+ loff_t off)
+{
+ struct mon_evt *mevt = rdt_kn_parent_priv(of->kn);
+ struct rdt_resource *r;
+ u32 evt_cfg = 0;
+ int ret = 0;
+
+ /* Valid input requires a trailing newline */
+ if (nbytes == 0 || buf[nbytes - 1] != '\n')
+ return -EINVAL;
+
+ buf[nbytes - 1] = '\0';
+
+ cpus_read_lock();
+ mutex_lock(&rdtgroup_mutex);
+
+ rdt_last_cmd_clear();
+
+ r = resctrl_arch_get_resource(mevt->rid);
+ if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
+ rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ ret = resctrl_parse_mem_transactions(buf, &evt_cfg);
+ if (!ret && mevt->evt_cfg != evt_cfg) {
+ mevt->evt_cfg = evt_cfg;
+ resctrl_update_cntr_allrdtgrp(mevt);
+ }
+
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
+
+ return ret ?: nbytes;
+}
+
+int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v)
+{
+ struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
+ bool enabled;
+
+ mutex_lock(&rdtgroup_mutex);
+ enabled = resctrl_arch_mbm_cntr_assign_enabled(r);
+
+ if (r->mon.mbm_cntr_assignable) {
+ if (enabled)
+ seq_puts(s, "[mbm_event]\n");
+ else
+ seq_puts(s, "[default]\n");
+
+ if (!IS_ENABLED(CONFIG_RESCTRL_ASSIGN_FIXED)) {
+ if (enabled)
+ seq_puts(s, "default\n");
+ else
+ seq_puts(s, "mbm_event\n");
+ }
+ } else {
+ seq_puts(s, "[default]\n");
+ }
+
+ mutex_unlock(&rdtgroup_mutex);
+
+ return 0;
+}
+
+ssize_t resctrl_mbm_assign_mode_write(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, loff_t off)
+{
+ struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
+ struct rdt_mon_domain *d;
+ int ret = 0;
+ bool enable;
+
+ /* Valid input requires a trailing newline */
+ if (nbytes == 0 || buf[nbytes - 1] != '\n')
+ return -EINVAL;
+
+ buf[nbytes - 1] = '\0';
+
+ cpus_read_lock();
+ mutex_lock(&rdtgroup_mutex);
+
+ rdt_last_cmd_clear();
+
+ if (!strcmp(buf, "default")) {
+ enable = 0;
+ } else if (!strcmp(buf, "mbm_event")) {
+ if (r->mon.mbm_cntr_assignable) {
+ enable = 1;
+ } else {
+ ret = -EINVAL;
+ rdt_last_cmd_puts("mbm_event mode is not supported\n");
+ goto out_unlock;
+ }
+ } else {
+ ret = -EINVAL;
+ rdt_last_cmd_puts("Unsupported assign mode\n");
+ goto out_unlock;
+ }
+
+ if (enable != resctrl_arch_mbm_cntr_assign_enabled(r)) {
+ ret = resctrl_arch_mbm_cntr_assign_set(r, enable);
+ if (ret)
+ goto out_unlock;
+
+ /* Update the visibility of BMEC related files */
+ resctrl_bmec_files_show(r, NULL, !enable);
+
+ /*
+ * Initialize the default memory transaction values for
+ * total and local events.
+ */
+ if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
+ mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask;
+ if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
+ mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask &
+ (READS_TO_LOCAL_MEM |
+ READS_TO_LOCAL_S_MEM |
+ NON_TEMP_WRITE_TO_LOCAL_MEM);
+ /* Enable auto assignment when switching to "mbm_event" mode */
+ if (enable)
+ r->mon.mbm_assign_on_mkdir = true;
+ /*
+ * Reset all the non-achitectural RMID state and assignable counters.
+ */
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ mbm_cntr_free_all(r, d);
+ resctrl_reset_rmid_all(r, d);
+ }
+ }
+
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
+
+ return ret ?: nbytes;
+}
+
+int resctrl_num_mbm_cntrs_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v)
+{
+ struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
+ struct rdt_mon_domain *dom;
+ bool sep = false;
+
+ cpus_read_lock();
+ mutex_lock(&rdtgroup_mutex);
+
+ list_for_each_entry(dom, &r->mon_domains, hdr.list) {
+ if (sep)
+ seq_putc(s, ';');
+
+ seq_printf(s, "%d=%d", dom->hdr.id, r->mon.num_mbm_cntrs);
+ sep = true;
+ }
+ seq_putc(s, '\n');
+
+ mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
+ return 0;
+}
+
+int resctrl_available_mbm_cntrs_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v)
+{
+ struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
+ struct rdt_mon_domain *dom;
+ bool sep = false;
+ u32 cntrs, i;
+ int ret = 0;
+
+ cpus_read_lock();
+ mutex_lock(&rdtgroup_mutex);
+
+ rdt_last_cmd_clear();
+
+ if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
+ rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ list_for_each_entry(dom, &r->mon_domains, hdr.list) {
+ if (sep)
+ seq_putc(s, ';');
+
+ cntrs = 0;
+ for (i = 0; i < r->mon.num_mbm_cntrs; i++) {
+ if (!dom->cntr_cfg[i].rdtgrp)
+ cntrs++;
+ }
+
+ seq_printf(s, "%d=%u", dom->hdr.id, cntrs);
+ sep = true;
+ }
+ seq_putc(s, '\n');
+
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
+
+ return ret;
+}
+
+int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v)
+{
+ struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
+ struct rdt_mon_domain *d;
+ struct rdtgroup *rdtgrp;
+ struct mon_evt *mevt;
+ int ret = 0;
+ bool sep;
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (!rdtgrp) {
+ ret = -ENOENT;
+ goto out_unlock;
+ }
+
+ rdt_last_cmd_clear();
+ if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
+ rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ for_each_mon_event(mevt) {
+ if (mevt->rid != r->rid || !mevt->enabled || !resctrl_is_mbm_event(mevt->evtid))
+ continue;
+
+ sep = false;
+ seq_printf(s, "%s:", mevt->name);
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ if (sep)
+ seq_putc(s, ';');
+
+ if (mbm_cntr_get(r, d, rdtgrp, mevt->evtid) < 0)
+ seq_printf(s, "%d=_", d->hdr.id);
+ else
+ seq_printf(s, "%d=e", d->hdr.id);
+
+ sep = true;
+ }
+ seq_putc(s, '\n');
+ }
+
+out_unlock:
+ rdtgroup_kn_unlock(of->kn);
+
+ return ret;
+}
+
+/*
+ * mbm_get_mon_event_by_name() - Return the mon_evt entry for the matching
+ * event name.
+ */
+static struct mon_evt *mbm_get_mon_event_by_name(struct rdt_resource *r, char *name)
+{
+ struct mon_evt *mevt;
+
+ for_each_mon_event(mevt) {
+ if (mevt->rid == r->rid && mevt->enabled &&
+ resctrl_is_mbm_event(mevt->evtid) &&
+ !strcmp(mevt->name, name))
+ return mevt;
+ }
+
+ return NULL;
+}
+
+static int rdtgroup_modify_assign_state(char *assign, struct rdt_mon_domain *d,
+ struct rdtgroup *rdtgrp, struct mon_evt *mevt)
+{
+ int ret = 0;
+
+ if (!assign || strlen(assign) != 1)
+ return -EINVAL;
+
+ switch (*assign) {
+ case 'e':
+ ret = rdtgroup_assign_cntr_event(d, rdtgrp, mevt);
+ break;
+ case '_':
+ rdtgroup_unassign_cntr_event(d, rdtgrp, mevt);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int resctrl_parse_mbm_assignment(struct rdt_resource *r, struct rdtgroup *rdtgrp,
+ char *event, char *tok)
+{
+ struct rdt_mon_domain *d;
+ unsigned long dom_id = 0;
+ char *dom_str, *id_str;
+ struct mon_evt *mevt;
+ int ret;
+
+ mevt = mbm_get_mon_event_by_name(r, event);
+ if (!mevt) {
+ rdt_last_cmd_printf("Invalid event %s\n", event);
+ return -ENOENT;
+ }
+
+next:
+ if (!tok || tok[0] == '\0')
+ return 0;
+
+ /* Start processing the strings for each domain */
+ dom_str = strim(strsep(&tok, ";"));
+
+ id_str = strsep(&dom_str, "=");
+
+ /* Check for domain id '*' which means all domains */
+ if (id_str && *id_str == '*') {
+ ret = rdtgroup_modify_assign_state(dom_str, NULL, rdtgrp, mevt);
+ if (ret)
+ rdt_last_cmd_printf("Assign operation '%s:*=%s' failed\n",
+ event, dom_str);
+ return ret;
+ } else if (!id_str || kstrtoul(id_str, 10, &dom_id)) {
+ rdt_last_cmd_puts("Missing domain id\n");
+ return -EINVAL;
+ }
+
+ /* Verify if the dom_id is valid */
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ if (d->hdr.id == dom_id) {
+ ret = rdtgroup_modify_assign_state(dom_str, d, rdtgrp, mevt);
+ if (ret) {
+ rdt_last_cmd_printf("Assign operation '%s:%ld=%s' failed\n",
+ event, dom_id, dom_str);
+ return ret;
+ }
+ goto next;
+ }
+ }
+
+ rdt_last_cmd_printf("Invalid domain id %ld\n", dom_id);
+ return -EINVAL;
+}
+
+ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, loff_t off)
+{
+ struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
+ struct rdtgroup *rdtgrp;
+ char *token, *event;
+ int ret = 0;
+
+ /* Valid input requires a trailing newline */
+ if (nbytes == 0 || buf[nbytes - 1] != '\n')
+ return -EINVAL;
+
+ buf[nbytes - 1] = '\0';
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (!rdtgrp) {
+ rdtgroup_kn_unlock(of->kn);
+ return -ENOENT;
+ }
+ rdt_last_cmd_clear();
+
+ if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
+ rdt_last_cmd_puts("mbm_event mode is not enabled\n");
+ rdtgroup_kn_unlock(of->kn);
+ return -EINVAL;
+ }
+
+ while ((token = strsep(&buf, "\n")) != NULL) {
+ /*
+ * The write command follows the following format:
+ * "<Event>:<Domain ID>=<Assignment state>"
+ * Extract the event name first.
+ */
+ event = strsep(&token, ":");
+
+ ret = resctrl_parse_mbm_assignment(r, rdtgrp, event, token);
+ if (ret)
+ break;
+ }
+
+ rdtgroup_kn_unlock(of->kn);
+
+ return ret ?: nbytes;
}
/**
@@ -900,24 +1765,43 @@ int resctrl_mon_resource_init(void)
if (ret)
return ret;
- l3_mon_evt_init(r);
-
if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) {
- mbm_total_event.configurable = true;
+ mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].configurable = true;
resctrl_file_fflags_init("mbm_total_bytes_config",
RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
}
if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) {
- mbm_local_event.configurable = true;
+ mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].configurable = true;
resctrl_file_fflags_init("mbm_local_bytes_config",
RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
}
- if (resctrl_arch_is_mbm_local_enabled())
+ if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID;
- else if (resctrl_arch_is_mbm_total_enabled())
+ else if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID;
+ if (r->mon.mbm_cntr_assignable) {
+ if (!resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
+ resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID);
+ if (!resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
+ resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID);
+ mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask;
+ mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask &
+ (READS_TO_LOCAL_MEM |
+ READS_TO_LOCAL_S_MEM |
+ NON_TEMP_WRITE_TO_LOCAL_MEM);
+ r->mon.mbm_assign_on_mkdir = true;
+ resctrl_file_fflags_init("num_mbm_cntrs",
+ RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
+ resctrl_file_fflags_init("available_mbm_cntrs",
+ RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
+ resctrl_file_fflags_init("event_filter", RFTYPE_ASSIGN_CONFIG);
+ resctrl_file_fflags_init("mbm_assign_on_mkdir", RFTYPE_MON_INFO |
+ RFTYPE_RES_CACHE);
+ resctrl_file_fflags_init("mbm_L3_assignments", RFTYPE_MON_BASE);
+ }
+
return 0;
}
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
index 77d08229d855..0320360cd7a6 100644
--- a/fs/resctrl/rdtgroup.c
+++ b/fs/resctrl/rdtgroup.c
@@ -123,14 +123,8 @@ void rdt_staged_configs_clear(void)
static bool resctrl_is_mbm_enabled(void)
{
- return (resctrl_arch_is_mbm_total_enabled() ||
- resctrl_arch_is_mbm_local_enabled());
-}
-
-static bool resctrl_is_mbm_event(int e)
-{
- return (e >= QOS_L3_MBM_TOTAL_EVENT_ID &&
- e <= QOS_L3_MBM_LOCAL_EVENT_ID);
+ return (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID) ||
+ resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID));
}
/*
@@ -196,7 +190,7 @@ static int closid_alloc(void)
lockdep_assert_held(&rdtgroup_mutex);
if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) &&
- resctrl_arch_is_llc_occupancy_enabled()) {
+ resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID)) {
cleanest_closid = resctrl_find_cleanest_closid();
if (cleanest_closid < 0)
return cleanest_closid;
@@ -981,7 +975,7 @@ static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
return 0;
}
-static void *rdt_kn_parent_priv(struct kernfs_node *kn)
+void *rdt_kn_parent_priv(struct kernfs_node *kn)
{
/*
* The parent pointer is only valid within RCU section since it can be
@@ -1141,7 +1135,7 @@ static int rdt_num_rmids_show(struct kernfs_open_file *of,
{
struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
- seq_printf(seq, "%d\n", r->num_rmid);
+ seq_printf(seq, "%d\n", r->mon.num_rmid);
return 0;
}
@@ -1152,9 +1146,12 @@ static int rdt_mon_features_show(struct kernfs_open_file *of,
struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
struct mon_evt *mevt;
- list_for_each_entry(mevt, &r->evt_list, list) {
+ for_each_mon_event(mevt) {
+ if (mevt->rid != r->rid || !mevt->enabled)
+ continue;
seq_printf(seq, "%s\n", mevt->name);
- if (mevt->configurable)
+ if (mevt->configurable &&
+ !resctrl_arch_mbm_cntr_assign_enabled(r))
seq_printf(seq, "%s_config\n", mevt->name);
}
@@ -1735,9 +1732,9 @@ next:
}
/* Value from user cannot be more than the supported set of events */
- if ((val & r->mbm_cfg_mask) != val) {
+ if ((val & r->mon.mbm_cfg_mask) != val) {
rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n",
- r->mbm_cfg_mask);
+ r->mon.mbm_cfg_mask);
return -EINVAL;
}
@@ -1803,6 +1800,44 @@ static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of,
return ret ?: nbytes;
}
+/*
+ * resctrl_bmec_files_show() — Controls the visibility of BMEC-related resctrl
+ * files. When @show is true, the files are displayed; when false, the files
+ * are hidden.
+ * Don't treat kernfs_find_and_get failure as an error, since this function may
+ * be called regardless of whether BMEC is supported or the event is enabled.
+ */
+void resctrl_bmec_files_show(struct rdt_resource *r, struct kernfs_node *l3_mon_kn,
+ bool show)
+{
+ struct kernfs_node *kn_config, *mon_kn = NULL;
+ char name[32];
+
+ if (!l3_mon_kn) {
+ sprintf(name, "%s_MON", r->name);
+ mon_kn = kernfs_find_and_get(kn_info, name);
+ if (!mon_kn)
+ return;
+ l3_mon_kn = mon_kn;
+ }
+
+ kn_config = kernfs_find_and_get(l3_mon_kn, "mbm_total_bytes_config");
+ if (kn_config) {
+ kernfs_show(kn_config, show);
+ kernfs_put(kn_config);
+ }
+
+ kn_config = kernfs_find_and_get(l3_mon_kn, "mbm_local_bytes_config");
+ if (kn_config) {
+ kernfs_show(kn_config, show);
+ kernfs_put(kn_config);
+ }
+
+ /* Release the reference only if it was acquired */
+ if (mon_kn)
+ kernfs_put(mon_kn);
+}
+
/* rdtgroup information files for one cache resource. */
static struct rftype res_common_files[] = {
{
@@ -1813,6 +1848,13 @@ static struct rftype res_common_files[] = {
.fflags = RFTYPE_TOP_INFO,
},
{
+ .name = "mbm_assign_on_mkdir",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = resctrl_mbm_assign_on_mkdir_show,
+ .write = resctrl_mbm_assign_on_mkdir_write,
+ },
+ {
.name = "num_closids",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
@@ -1827,6 +1869,12 @@ static struct rftype res_common_files[] = {
.fflags = RFTYPE_MON_INFO,
},
{
+ .name = "available_mbm_cntrs",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = resctrl_available_mbm_cntrs_show,
+ },
+ {
.name = "num_rmids",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
@@ -1841,6 +1889,12 @@ static struct rftype res_common_files[] = {
.fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
},
{
+ .name = "num_mbm_cntrs",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = resctrl_num_mbm_cntrs_show,
+ },
+ {
.name = "min_cbm_bits",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
@@ -1916,6 +1970,28 @@ static struct rftype res_common_files[] = {
.write = mbm_local_bytes_config_write,
},
{
+ .name = "event_filter",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = event_filter_show,
+ .write = event_filter_write,
+ },
+ {
+ .name = "mbm_L3_assignments",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = mbm_L3_assignments_show,
+ .write = mbm_L3_assignments_write,
+ },
+ {
+ .name = "mbm_assign_mode",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = resctrl_mbm_assign_mode_show,
+ .write = resctrl_mbm_assign_mode_write,
+ .fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE,
+ },
+ {
.name = "cpus",
.mode = 0644,
.kf_ops = &rdtgroup_kf_single_ops,
@@ -2168,10 +2244,48 @@ int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
return ret;
}
+static int resctrl_mkdir_event_configs(struct rdt_resource *r, struct kernfs_node *l3_mon_kn)
+{
+ struct kernfs_node *kn_subdir, *kn_subdir2;
+ struct mon_evt *mevt;
+ int ret;
+
+ kn_subdir = kernfs_create_dir(l3_mon_kn, "event_configs", l3_mon_kn->mode, NULL);
+ if (IS_ERR(kn_subdir))
+ return PTR_ERR(kn_subdir);
+
+ ret = rdtgroup_kn_set_ugid(kn_subdir);
+ if (ret)
+ return ret;
+
+ for_each_mon_event(mevt) {
+ if (mevt->rid != r->rid || !mevt->enabled || !resctrl_is_mbm_event(mevt->evtid))
+ continue;
+
+ kn_subdir2 = kernfs_create_dir(kn_subdir, mevt->name, kn_subdir->mode, mevt);
+ if (IS_ERR(kn_subdir2)) {
+ ret = PTR_ERR(kn_subdir2);
+ goto out;
+ }
+
+ ret = rdtgroup_kn_set_ugid(kn_subdir2);
+ if (ret)
+ goto out;
+
+ ret = rdtgroup_add_files(kn_subdir2, RFTYPE_ASSIGN_CONFIG);
+ if (ret)
+ break;
+ }
+
+out:
+ return ret;
+}
+
static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
unsigned long fflags)
{
struct kernfs_node *kn_subdir;
+ struct rdt_resource *r;
int ret;
kn_subdir = kernfs_create_dir(kn_info, name,
@@ -2184,8 +2298,25 @@ static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
return ret;
ret = rdtgroup_add_files(kn_subdir, fflags);
- if (!ret)
- kernfs_activate(kn_subdir);
+ if (ret)
+ return ret;
+
+ if ((fflags & RFTYPE_MON_INFO) == RFTYPE_MON_INFO) {
+ r = priv;
+ if (r->mon.mbm_cntr_assignable) {
+ ret = resctrl_mkdir_event_configs(r, kn_subdir);
+ if (ret)
+ return ret;
+ /*
+ * Hide BMEC related files if mbm_event mode
+ * is enabled.
+ */
+ if (resctrl_arch_mbm_cntr_assign_enabled(r))
+ resctrl_bmec_files_show(r, kn_subdir, false);
+ }
+ }
+
+ kernfs_activate(kn_subdir);
return ret;
}
@@ -2608,10 +2739,8 @@ static int rdt_get_tree(struct fs_context *fc)
goto out_root;
ret = schemata_list_create();
- if (ret) {
- schemata_list_destroy();
- goto out_ctx;
- }
+ if (ret)
+ goto out_schemata_free;
ret = closid_init();
if (ret)
@@ -2637,6 +2766,8 @@ static int rdt_get_tree(struct fs_context *fc)
if (ret < 0)
goto out_info;
+ rdtgroup_assign_cntrs(&rdtgroup_default);
+
ret = mkdir_mondata_all(rdtgroup_default.kn,
&rdtgroup_default, &kn_mondata);
if (ret < 0)
@@ -2675,15 +2806,16 @@ out_mondata:
if (resctrl_arch_mon_capable())
kernfs_remove(kn_mondata);
out_mongrp:
- if (resctrl_arch_mon_capable())
+ if (resctrl_arch_mon_capable()) {
+ rdtgroup_unassign_cntrs(&rdtgroup_default);
kernfs_remove(kn_mongrp);
+ }
out_info:
kernfs_remove(kn_info);
out_closid_exit:
closid_exit();
out_schemata_free:
schemata_list_destroy();
-out_ctx:
rdt_disable_ctx();
out_root:
rdtgroup_destroy_root();
@@ -2822,6 +2954,7 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
head = &rdtgrp->mon.crdtgrp_list;
list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
+ rdtgroup_unassign_cntrs(sentry);
free_rmid(sentry->closid, sentry->mon.rmid);
list_del(&sentry->mon.crdtgrp_list);
@@ -2862,6 +2995,8 @@ static void rmdir_all_sub(void)
cpumask_or(&rdtgroup_default.cpu_mask,
&rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
+ rdtgroup_unassign_cntrs(rdtgrp);
+
free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
kernfs_remove(rdtgrp->kn);
@@ -2946,6 +3081,7 @@ static void resctrl_fs_teardown(void)
return;
rmdir_all_sub();
+ rdtgroup_unassign_cntrs(&rdtgroup_default);
mon_put_kn_priv();
rdt_pseudo_lock_release();
rdtgroup_default.mode = RDT_MODE_SHAREABLE;
@@ -3057,10 +3193,9 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
struct mon_evt *mevt;
int ret, domid;
- if (WARN_ON(list_empty(&r->evt_list)))
- return -EPERM;
-
- list_for_each_entry(mevt, &r->evt_list, list) {
+ for_each_mon_event(mevt) {
+ if (mevt->rid != r->rid || !mevt->enabled)
+ continue;
domid = do_sum ? d->ci_id : d->hdr.id;
priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum);
if (WARN_ON_ONCE(!priv))
@@ -3427,9 +3562,12 @@ static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp)
}
rdtgrp->mon.rmid = ret;
+ rdtgroup_assign_cntrs(rdtgrp);
+
ret = mkdir_mondata_all(rdtgrp->kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
if (ret) {
rdt_last_cmd_puts("kernfs subdir error\n");
+ rdtgroup_unassign_cntrs(rdtgrp);
free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
return ret;
}
@@ -3439,8 +3577,10 @@ static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp)
static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp)
{
- if (resctrl_arch_mon_capable())
+ if (resctrl_arch_mon_capable()) {
+ rdtgroup_unassign_cntrs(rgrp);
free_rmid(rgrp->closid, rgrp->mon.rmid);
+ }
}
/*
@@ -3716,6 +3856,9 @@ static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
update_closid_rmid(tmpmask, NULL);
rdtgrp->flags = RDT_DELETED;
+
+ rdtgroup_unassign_cntrs(rdtgrp);
+
free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
/*
@@ -3763,6 +3906,8 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
update_closid_rmid(tmpmask, NULL);
+ rdtgroup_unassign_cntrs(rdtgrp);
+
free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
closid_free(rdtgrp->closid);
@@ -4022,9 +4167,14 @@ static void rdtgroup_setup_default(void)
static void domain_destroy_mon_state(struct rdt_mon_domain *d)
{
+ int idx;
+
+ kfree(d->cntr_cfg);
bitmap_free(d->rmid_busy_llc);
- kfree(d->mbm_total);
- kfree(d->mbm_local);
+ for_each_mbm_idx(idx) {
+ kfree(d->mbm_states[idx]);
+ d->mbm_states[idx] = NULL;
+ }
}
void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
@@ -4050,7 +4200,7 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d
if (resctrl_is_mbm_enabled())
cancel_delayed_work(&d->mbm_over);
- if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) {
+ if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID) && has_busy_rmid(d)) {
/*
* When a package is going down, forcefully
* decrement rmid->ebusy. There is no way to know
@@ -4084,32 +4234,41 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d
static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d)
{
u32 idx_limit = resctrl_arch_system_num_rmid_idx();
- size_t tsize;
+ size_t tsize = sizeof(*d->mbm_states[0]);
+ enum resctrl_event_id eventid;
+ int idx;
- if (resctrl_arch_is_llc_occupancy_enabled()) {
+ if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID)) {
d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
if (!d->rmid_busy_llc)
return -ENOMEM;
}
- if (resctrl_arch_is_mbm_total_enabled()) {
- tsize = sizeof(*d->mbm_total);
- d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
- if (!d->mbm_total) {
- bitmap_free(d->rmid_busy_llc);
- return -ENOMEM;
- }
+
+ for_each_mbm_event_id(eventid) {
+ if (!resctrl_is_mon_event_enabled(eventid))
+ continue;
+ idx = MBM_STATE_IDX(eventid);
+ d->mbm_states[idx] = kcalloc(idx_limit, tsize, GFP_KERNEL);
+ if (!d->mbm_states[idx])
+ goto cleanup;
}
- if (resctrl_arch_is_mbm_local_enabled()) {
- tsize = sizeof(*d->mbm_local);
- d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
- if (!d->mbm_local) {
- bitmap_free(d->rmid_busy_llc);
- kfree(d->mbm_total);
- return -ENOMEM;
- }
+
+ if (resctrl_is_mbm_enabled() && r->mon.mbm_cntr_assignable) {
+ tsize = sizeof(*d->cntr_cfg);
+ d->cntr_cfg = kcalloc(r->mon.num_mbm_cntrs, tsize, GFP_KERNEL);
+ if (!d->cntr_cfg)
+ goto cleanup;
}
return 0;
+cleanup:
+ bitmap_free(d->rmid_busy_llc);
+ for_each_mbm_idx(idx) {
+ kfree(d->mbm_states[idx]);
+ d->mbm_states[idx] = NULL;
+ }
+
+ return -ENOMEM;
}
int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
@@ -4144,7 +4303,7 @@ int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
RESCTRL_PICK_ANY_CPU);
}
- if (resctrl_arch_is_llc_occupancy_enabled())
+ if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID))
INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
/*
@@ -4219,7 +4378,7 @@ void resctrl_offline_cpu(unsigned int cpu)
cancel_delayed_work(&d->mbm_over);
mbm_setup_overflow_handler(d, 0, cpu);
}
- if (resctrl_arch_is_llc_occupancy_enabled() &&
+ if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID) &&
cpu == d->cqm_work_cpu && has_busy_rmid(d)) {
cancel_delayed_work(&d->cqm_limbo);
cqm_setup_limbo_handler(d, 0, cpu);
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 6fb4894b8cfd..a7d92718b653 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -157,27 +157,42 @@ struct rdt_ctrl_domain {
};
/**
+ * struct mbm_cntr_cfg - Assignable counter configuration.
+ * @evtid: MBM event to which the counter is assigned. Only valid
+ * if @rdtgroup is not NULL.
+ * @rdtgrp: resctrl group assigned to the counter. NULL if the
+ * counter is free.
+ */
+struct mbm_cntr_cfg {
+ enum resctrl_event_id evtid;
+ struct rdtgroup *rdtgrp;
+};
+
+/**
* struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource
* @hdr: common header for different domain types
* @ci_id: cache info id for this domain
* @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold
- * @mbm_total: saved state for MBM total bandwidth
- * @mbm_local: saved state for MBM local bandwidth
+ * @mbm_states: Per-event pointer to the MBM event's saved state.
+ * An MBM event's state is an array of struct mbm_state
+ * indexed by RMID on x86 or combined CLOSID, RMID on Arm.
* @mbm_over: worker to periodically read MBM h/w counters
* @cqm_limbo: worker to periodically read CQM h/w counters
* @mbm_work_cpu: worker CPU for MBM h/w counters
* @cqm_work_cpu: worker CPU for CQM h/w counters
+ * @cntr_cfg: array of assignable counters' configuration (indexed
+ * by counter ID)
*/
struct rdt_mon_domain {
struct rdt_domain_hdr hdr;
unsigned int ci_id;
unsigned long *rmid_busy_llc;
- struct mbm_state *mbm_total;
- struct mbm_state *mbm_local;
+ struct mbm_state *mbm_states[QOS_NUM_L3_MBM_EVENTS];
struct delayed_work mbm_over;
struct delayed_work cqm_limbo;
int mbm_work_cpu;
int cqm_work_cpu;
+ struct mbm_cntr_cfg *cntr_cfg;
};
/**
@@ -256,39 +271,52 @@ enum resctrl_schema_fmt {
};
/**
+ * struct resctrl_mon - Monitoring related data of a resctrl resource.
+ * @num_rmid: Number of RMIDs available.
+ * @mbm_cfg_mask: Memory transactions that can be tracked when bandwidth
+ * monitoring events can be configured.
+ * @num_mbm_cntrs: Number of assignable counters.
+ * @mbm_cntr_assignable:Is system capable of supporting counter assignment?
+ * @mbm_assign_on_mkdir:True if counters should automatically be assigned to MBM
+ * events of monitor groups created via mkdir.
+ */
+struct resctrl_mon {
+ int num_rmid;
+ unsigned int mbm_cfg_mask;
+ int num_mbm_cntrs;
+ bool mbm_cntr_assignable;
+ bool mbm_assign_on_mkdir;
+};
+
+/**
* struct rdt_resource - attributes of a resctrl resource
* @rid: The index of the resource
* @alloc_capable: Is allocation available on this machine
* @mon_capable: Is monitor feature available on this machine
- * @num_rmid: Number of RMIDs available
* @ctrl_scope: Scope of this resource for control functions
* @mon_scope: Scope of this resource for monitor functions
* @cache: Cache allocation related data
* @membw: If the component has bandwidth controls, their properties.
+ * @mon: Monitoring related data.
* @ctrl_domains: RCU list of all control domains for this resource
* @mon_domains: RCU list of all monitor domains for this resource
* @name: Name to use in "schemata" file.
* @schema_fmt: Which format string and parser is used for this schema.
- * @evt_list: List of monitoring events
- * @mbm_cfg_mask: Bandwidth sources that can be tracked when bandwidth
- * monitoring events can be configured.
* @cdp_capable: Is the CDP feature available on this resource
*/
struct rdt_resource {
int rid;
bool alloc_capable;
bool mon_capable;
- int num_rmid;
enum resctrl_scope ctrl_scope;
enum resctrl_scope mon_scope;
struct resctrl_cache cache;
struct resctrl_membw membw;
+ struct resctrl_mon mon;
struct list_head ctrl_domains;
struct list_head mon_domains;
char *name;
enum resctrl_schema_fmt schema_fmt;
- struct list_head evt_list;
- unsigned int mbm_cfg_mask;
bool cdp_capable;
};
@@ -372,8 +400,29 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
u32 resctrl_arch_system_num_rmid_idx(void);
int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
+void resctrl_enable_mon_event(enum resctrl_event_id eventid);
+
+bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid);
+
bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt);
+static inline bool resctrl_is_mbm_event(enum resctrl_event_id eventid)
+{
+ return (eventid >= QOS_L3_MBM_TOTAL_EVENT_ID &&
+ eventid <= QOS_L3_MBM_LOCAL_EVENT_ID);
+}
+
+u32 resctrl_get_mon_evt_cfg(enum resctrl_event_id eventid);
+
+/* Iterate over all memory bandwidth events */
+#define for_each_mbm_event_id(eventid) \
+ for (eventid = QOS_L3_MBM_TOTAL_EVENT_ID; \
+ eventid <= QOS_L3_MBM_LOCAL_EVENT_ID; eventid++)
+
+/* Iterate over memory bandwidth arrays in domain structures */
+#define for_each_mbm_idx(idx) \
+ for (idx = 0; idx < QOS_NUM_L3_MBM_EVENTS; idx++)
+
/**
* resctrl_arch_mon_event_config_write() - Write the config for an event.
* @config_info: struct resctrl_mon_config_info describing the resource, domain
@@ -416,6 +465,26 @@ static inline u32 resctrl_get_config_index(u32 closid,
bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l);
int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable);
+/**
+ * resctrl_arch_mbm_cntr_assign_enabled() - Check if MBM counter assignment
+ * mode is enabled.
+ * @r: Pointer to the resource structure.
+ *
+ * Return:
+ * true if the assignment mode is enabled, false otherwise.
+ */
+bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r);
+
+/**
+ * resctrl_arch_mbm_cntr_assign_set() - Configure the MBM counter assignment mode.
+ * @r: Pointer to the resource structure.
+ * @enable: Set to true to enable, false to disable the assignment mode.
+ *
+ * Return:
+ * 0 on success, < 0 on error.
+ */
+int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable);
+
/*
* Update the ctrl_val and apply this config right now.
* Must be called on one of the domain's CPUs.
@@ -528,6 +597,63 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *
*/
void resctrl_arch_reset_all_ctrls(struct rdt_resource *r);
+/**
+ * resctrl_arch_config_cntr() - Configure the counter with its new RMID
+ * and event details.
+ * @r: Resource structure.
+ * @d: The domain in which counter with ID @cntr_id should be configured.
+ * @evtid: Monitoring event type (e.g., QOS_L3_MBM_TOTAL_EVENT_ID
+ * or QOS_L3_MBM_LOCAL_EVENT_ID).
+ * @rmid: RMID.
+ * @closid: CLOSID.
+ * @cntr_id: Counter ID to configure.
+ * @assign: True to assign the counter or update an existing assignment,
+ * false to unassign the counter.
+ *
+ * This can be called from any CPU.
+ */
+void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+ enum resctrl_event_id evtid, u32 rmid, u32 closid,
+ u32 cntr_id, bool assign);
+
+/**
+ * resctrl_arch_cntr_read() - Read the event data corresponding to the counter ID
+ * assigned to the RMID, event pair for this resource
+ * and domain.
+ * @r: Resource that the counter should be read from.
+ * @d: Domain that the counter should be read from.
+ * @closid: CLOSID that matches the RMID.
+ * @rmid: The RMID to which @cntr_id is assigned.
+ * @cntr_id: The counter to read.
+ * @eventid: The MBM event to which @cntr_id is assigned.
+ * @val: Result of the counter read in bytes.
+ *
+ * Called on a CPU that belongs to domain @d when "mbm_event" mode is enabled.
+ * Called from a non-migrateable process context via smp_call_on_cpu() unless all
+ * CPUs are nohz_full, in which case it is called via IPI (smp_call_function_any()).
+ *
+ * Return:
+ * 0 on success, or -EIO, -EINVAL etc on error.
+ */
+int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d,
+ u32 closid, u32 rmid, int cntr_id,
+ enum resctrl_event_id eventid, u64 *val);
+
+/**
+ * resctrl_arch_reset_cntr() - Reset any private state associated with counter ID.
+ * @r: The domain's resource.
+ * @d: The counter ID's domain.
+ * @closid: CLOSID that matches the RMID.
+ * @rmid: The RMID to which @cntr_id is assigned.
+ * @cntr_id: The counter to reset.
+ * @eventid: The MBM event to which @cntr_id is assigned.
+ *
+ * This can be called from any CPU.
+ */
+void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+ u32 closid, u32 rmid, int cntr_id,
+ enum resctrl_event_id eventid);
+
extern unsigned int resctrl_rmid_realloc_threshold;
extern unsigned int resctrl_rmid_realloc_limit;
diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
index a25fb9c4070d..acfe07860b34 100644
--- a/include/linux/resctrl_types.h
+++ b/include/linux/resctrl_types.h
@@ -34,11 +34,18 @@
/* Max event bits supported */
#define MAX_EVT_CONFIG_BITS GENMASK(6, 0)
-/*
- * Event IDs, the values match those used to program IA32_QM_EVTSEL before
- * reading IA32_QM_CTR on RDT systems.
- */
+/* Number of memory transactions that an MBM event can be configured with */
+#define NUM_MBM_TRANSACTIONS 7
+
+/* Event IDs */
enum resctrl_event_id {
+ /* Must match value of first event below */
+ QOS_FIRST_EVENT = 0x01,
+
+ /*
+ * These values match those used to program IA32_QM_EVTSEL before
+ * reading IA32_QM_CTR on RDT systems.
+ */
QOS_L3_OCCUP_EVENT_ID = 0x01,
QOS_L3_MBM_TOTAL_EVENT_ID = 0x02,
QOS_L3_MBM_LOCAL_EVENT_ID = 0x03,
@@ -47,4 +54,7 @@ enum resctrl_event_id {
QOS_NUM_EVENTS,
};
+#define QOS_NUM_L3_MBM_EVENTS (QOS_L3_MBM_LOCAL_EVENT_ID - QOS_L3_MBM_TOTAL_EVENT_ID + 1)
+#define MBM_STATE_IDX(evt) ((evt) - QOS_L3_MBM_TOTAL_EVENT_ID)
+
#endif /* __LINUX_RESCTRL_TYPES_H */