1
0

release 6.12.4 (preliminary)

This commit is contained in:
2024-12-10 06:44:25 +03:00
parent 9debc8729c
commit 407e7bac82
246 changed files with 4681 additions and 5758 deletions

View File

@@ -1,321 +0,0 @@
From 023d6b8aa8d8b346cfdcccf5ca4cb880c8d41d87 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 2 Aug 2024 08:16:37 -0700
Subject: perf: Generic hotplug support for a PMU with a scope
The perf subsystem assumes that the counters of a PMU are per-CPU. So
the user space tool reads a counter from each CPU in the system wide
mode. However, many PMUs don't have a per-CPU counter. The counter is
effective for a scope, e.g., a die or a socket. To address this, a
cpumask is exposed by the kernel driver to restrict to one CPU to stand
for a specific scope. In case the given CPU is removed,
the hotplug support has to be implemented for each such driver.
The codes to support the cpumask and hotplug are very similar.
- Expose a cpumask into sysfs
- Pickup another CPU in the same scope if the given CPU is removed.
- Invoke the perf_pmu_migrate_context() to migrate to a new CPU.
- In event init, always set the CPU in the cpumask to event->cpu
Similar duplicated codes are implemented for each such PMU driver. It
would be good to introduce a generic infrastructure to avoid such
duplication.
5 popular scopes are implemented here, core, die, cluster, pkg, and
the system-wide. The scope can be set when a PMU is registered. If so, a
"cpumask" is automatically exposed for the PMU.
The "cpumask" is from the perf_online_<scope>_mask, which is to track
the active CPU for each scope. They are set when the first CPU of the
scope is online via the generic perf hotplug support. When a
corresponding CPU is removed, the perf_online_<scope>_mask is updated
accordingly and the PMU will be moved to a new CPU from the same scope
if possible.
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
include/linux/perf_event.h | 18 ++++
kernel/events/core.c | 164 ++++++++++++++++++++++++++++++++++++-
2 files changed, 180 insertions(+), 2 deletions(-)
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -292,6 +292,19 @@ struct perf_event_pmu_context;
#define PERF_PMU_CAP_AUX_OUTPUT 0x0080
#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
+/**
+ * pmu::scope
+ */
+enum perf_pmu_scope {
+ PERF_PMU_SCOPE_NONE = 0,
+ PERF_PMU_SCOPE_CORE,
+ PERF_PMU_SCOPE_DIE,
+ PERF_PMU_SCOPE_CLUSTER,
+ PERF_PMU_SCOPE_PKG,
+ PERF_PMU_SCOPE_SYS_WIDE,
+ PERF_PMU_MAX_SCOPE,
+};
+
struct perf_output_handle;
#define PMU_NULL_DEV ((void *)(~0UL))
@@ -315,6 +328,11 @@ struct pmu {
*/
int capabilities;
+ /*
+ * PMU scope
+ */
+ unsigned int scope;
+
int __percpu *pmu_disable_count;
struct perf_cpu_pmu_context __percpu *cpu_pmu_context;
atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -411,6 +411,11 @@ static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
static struct srcu_struct pmus_srcu;
static cpumask_var_t perf_online_mask;
+static cpumask_var_t perf_online_core_mask;
+static cpumask_var_t perf_online_die_mask;
+static cpumask_var_t perf_online_cluster_mask;
+static cpumask_var_t perf_online_pkg_mask;
+static cpumask_var_t perf_online_sys_mask;
static struct kmem_cache *perf_event_cache;
/*
@@ -11497,10 +11502,60 @@ perf_event_mux_interval_ms_store(struct
}
static DEVICE_ATTR_RW(perf_event_mux_interval_ms);
+static inline const struct cpumask *perf_scope_cpu_topology_cpumask(unsigned int scope, int cpu)
+{
+ switch (scope) {
+ case PERF_PMU_SCOPE_CORE:
+ return topology_sibling_cpumask(cpu);
+ case PERF_PMU_SCOPE_DIE:
+ return topology_die_cpumask(cpu);
+ case PERF_PMU_SCOPE_CLUSTER:
+ return topology_cluster_cpumask(cpu);
+ case PERF_PMU_SCOPE_PKG:
+ return topology_core_cpumask(cpu);
+ case PERF_PMU_SCOPE_SYS_WIDE:
+ return cpu_online_mask;
+ }
+
+ return NULL;
+}
+
+static inline struct cpumask *perf_scope_cpumask(unsigned int scope)
+{
+ switch (scope) {
+ case PERF_PMU_SCOPE_CORE:
+ return perf_online_core_mask;
+ case PERF_PMU_SCOPE_DIE:
+ return perf_online_die_mask;
+ case PERF_PMU_SCOPE_CLUSTER:
+ return perf_online_cluster_mask;
+ case PERF_PMU_SCOPE_PKG:
+ return perf_online_pkg_mask;
+ case PERF_PMU_SCOPE_SYS_WIDE:
+ return perf_online_sys_mask;
+ }
+
+ return NULL;
+}
+
+static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct cpumask *mask = perf_scope_cpumask(pmu->scope);
+
+ if (mask)
+ return cpumap_print_to_pagebuf(true, buf, mask);
+ return 0;
+}
+
+static DEVICE_ATTR_RO(cpumask);
+
static struct attribute *pmu_dev_attrs[] = {
&dev_attr_type.attr,
&dev_attr_perf_event_mux_interval_ms.attr,
&dev_attr_nr_addr_filters.attr,
+ &dev_attr_cpumask.attr,
NULL,
};
@@ -11512,6 +11567,10 @@ static umode_t pmu_dev_is_visible(struct
if (n == 2 && !pmu->nr_addr_filters)
return 0;
+ /* cpumask */
+ if (n == 3 && pmu->scope == PERF_PMU_SCOPE_NONE)
+ return 0;
+
return a->mode;
}
@@ -11596,6 +11655,11 @@ int perf_pmu_register(struct pmu *pmu, c
goto free_pdc;
}
+ if (WARN_ONCE(pmu->scope >= PERF_PMU_MAX_SCOPE, "Can not register a pmu with an invalid scope.\n")) {
+ ret = -EINVAL;
+ goto free_pdc;
+ }
+
pmu->name = name;
if (type >= 0)
@@ -11750,6 +11814,22 @@ static int perf_try_init_event(struct pm
event_has_any_exclude_flag(event))
ret = -EINVAL;
+ if (pmu->scope != PERF_PMU_SCOPE_NONE && event->cpu >= 0) {
+ const struct cpumask *cpumask = perf_scope_cpu_topology_cpumask(pmu->scope, event->cpu);
+ struct cpumask *pmu_cpumask = perf_scope_cpumask(pmu->scope);
+ int cpu;
+
+ if (pmu_cpumask && cpumask) {
+ cpu = cpumask_any_and(pmu_cpumask, cpumask);
+ if (cpu >= nr_cpu_ids)
+ ret = -ENODEV;
+ else
+ event->cpu = cpu;
+ } else {
+ ret = -ENODEV;
+ }
+ }
+
if (ret && event->destroy)
event->destroy(event);
}
@@ -13713,6 +13793,12 @@ static void __init perf_event_init_all_c
int cpu;
zalloc_cpumask_var(&perf_online_mask, GFP_KERNEL);
+ zalloc_cpumask_var(&perf_online_core_mask, GFP_KERNEL);
+ zalloc_cpumask_var(&perf_online_die_mask, GFP_KERNEL);
+ zalloc_cpumask_var(&perf_online_cluster_mask, GFP_KERNEL);
+ zalloc_cpumask_var(&perf_online_pkg_mask, GFP_KERNEL);
+ zalloc_cpumask_var(&perf_online_sys_mask, GFP_KERNEL);
+
for_each_possible_cpu(cpu) {
swhash = &per_cpu(swevent_htable, cpu);
@@ -13762,6 +13848,40 @@ static void __perf_event_exit_context(vo
raw_spin_unlock(&ctx->lock);
}
+static void perf_event_clear_cpumask(unsigned int cpu)
+{
+ int target[PERF_PMU_MAX_SCOPE];
+ unsigned int scope;
+ struct pmu *pmu;
+
+ cpumask_clear_cpu(cpu, perf_online_mask);
+
+ for (scope = PERF_PMU_SCOPE_NONE + 1; scope < PERF_PMU_MAX_SCOPE; scope++) {
+ const struct cpumask *cpumask = perf_scope_cpu_topology_cpumask(scope, cpu);
+ struct cpumask *pmu_cpumask = perf_scope_cpumask(scope);
+
+ target[scope] = -1;
+ if (WARN_ON_ONCE(!pmu_cpumask || !cpumask))
+ continue;
+
+ if (!cpumask_test_and_clear_cpu(cpu, pmu_cpumask))
+ continue;
+ target[scope] = cpumask_any_but(cpumask, cpu);
+ if (target[scope] < nr_cpu_ids)
+ cpumask_set_cpu(target[scope], pmu_cpumask);
+ }
+
+ /* migrate */
+ list_for_each_entry_rcu(pmu, &pmus, entry, lockdep_is_held(&pmus_srcu)) {
+ if (pmu->scope == PERF_PMU_SCOPE_NONE ||
+ WARN_ON_ONCE(pmu->scope >= PERF_PMU_MAX_SCOPE))
+ continue;
+
+ if (target[pmu->scope] >= 0 && target[pmu->scope] < nr_cpu_ids)
+ perf_pmu_migrate_context(pmu, cpu, target[pmu->scope]);
+ }
+}
+
static void perf_event_exit_cpu_context(int cpu)
{
struct perf_cpu_context *cpuctx;
@@ -13769,6 +13889,11 @@ static void perf_event_exit_cpu_context(
// XXX simplify cpuctx->online
mutex_lock(&pmus_lock);
+ /*
+ * Clear the cpumasks, and migrate to other CPUs if possible.
+ * Must be invoked before the __perf_event_exit_context.
+ */
+ perf_event_clear_cpumask(cpu);
cpuctx = per_cpu_ptr(&perf_cpu_context, cpu);
ctx = &cpuctx->ctx;
@@ -13776,7 +13901,6 @@ static void perf_event_exit_cpu_context(
smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
cpuctx->online = 0;
mutex_unlock(&ctx->mutex);
- cpumask_clear_cpu(cpu, perf_online_mask);
mutex_unlock(&pmus_lock);
}
#else
@@ -13785,6 +13909,42 @@ static void perf_event_exit_cpu_context(
#endif
+static void perf_event_setup_cpumask(unsigned int cpu)
+{
+ struct cpumask *pmu_cpumask;
+ unsigned int scope;
+
+ cpumask_set_cpu(cpu, perf_online_mask);
+
+ /*
+ * Early boot stage, the cpumask hasn't been set yet.
+ * The perf_online_<domain>_masks includes the first CPU of each domain.
+ * Always uncondifionally set the boot CPU for the perf_online_<domain>_masks.
+ */
+ if (!topology_sibling_cpumask(cpu)) {
+ for (scope = PERF_PMU_SCOPE_NONE + 1; scope < PERF_PMU_MAX_SCOPE; scope++) {
+ pmu_cpumask = perf_scope_cpumask(scope);
+ if (WARN_ON_ONCE(!pmu_cpumask))
+ continue;
+ cpumask_set_cpu(cpu, pmu_cpumask);
+ }
+ return;
+ }
+
+ for (scope = PERF_PMU_SCOPE_NONE + 1; scope < PERF_PMU_MAX_SCOPE; scope++) {
+ const struct cpumask *cpumask = perf_scope_cpu_topology_cpumask(scope, cpu);
+
+ pmu_cpumask = perf_scope_cpumask(scope);
+
+ if (WARN_ON_ONCE(!pmu_cpumask || !cpumask))
+ continue;
+
+ if (!cpumask_empty(cpumask) &&
+ cpumask_any_and(pmu_cpumask, cpumask) >= nr_cpu_ids)
+ cpumask_set_cpu(cpu, pmu_cpumask);
+ }
+}
+
int perf_event_init_cpu(unsigned int cpu)
{
struct perf_cpu_context *cpuctx;
@@ -13793,7 +13953,7 @@ int perf_event_init_cpu(unsigned int cpu
perf_swevent_init_cpu(cpu);
mutex_lock(&pmus_lock);
- cpumask_set_cpu(cpu, perf_online_mask);
+ perf_event_setup_cpumask(cpu);
cpuctx = per_cpu_ptr(&perf_cpu_context, cpu);
ctx = &cpuctx->ctx;

View File

@@ -0,0 +1,87 @@
From 997fe7115fcb9392f31aa07f407675194452dc0a Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Thu, 10 Oct 2024 07:26:03 -0700
Subject: perf/x86/rapl: Move the pmu allocation out of CPU hotplug
There are extra codes in the CPU hotplug function to allocate rapl pmus.
The generic PMU hotplug support is hard to be applied.
As long as the rapl pmus can be allocated upfront for each die/socket,
the code doesn't need to be implemented in the CPU hotplug function.
Move the code to the init_rapl_pmus(), and allocate a PMU for each
possible die/socket.
Tested-by: Oliver Sang <oliver.sang@intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
---
arch/x86/events/rapl.c | 44 ++++++++++++++++++++++++++++--------------
1 file changed, 30 insertions(+), 14 deletions(-)
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -602,19 +602,8 @@ static int rapl_cpu_online(unsigned int
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
int target;
- if (!pmu) {
- pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
- if (!pmu)
- return -ENOMEM;
-
- raw_spin_lock_init(&pmu->lock);
- INIT_LIST_HEAD(&pmu->active_list);
- pmu->pmu = &rapl_pmus->pmu;
- pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
- rapl_hrtimer_init(pmu);
-
- rapl_pmus->pmus[rapl_pmu_idx] = pmu;
- }
+ if (!pmu)
+ return -ENOMEM;
/*
* Check if there is an online cpu in the package which collects rapl
@@ -707,6 +696,32 @@ static const struct attribute_group *rap
NULL,
};
+static int __init init_rapl_pmu(void)
+{
+ struct rapl_pmu *pmu;
+ int idx;
+
+ for (idx = 0; idx < rapl_pmus->nr_rapl_pmu; idx++) {
+ pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
+ if (!pmu)
+ goto free;
+
+ raw_spin_lock_init(&pmu->lock);
+ INIT_LIST_HEAD(&pmu->active_list);
+ pmu->pmu = &rapl_pmus->pmu;
+ pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+ rapl_hrtimer_init(pmu);
+
+ rapl_pmus->pmus[idx] = pmu;
+ }
+
+ return 0;
+free:
+ for (; idx > 0; idx--)
+ kfree(rapl_pmus->pmus[idx - 1]);
+ return -ENOMEM;
+}
+
static int __init init_rapl_pmus(void)
{
int nr_rapl_pmu = topology_max_packages();
@@ -730,7 +745,8 @@ static int __init init_rapl_pmus(void)
rapl_pmus->pmu.read = rapl_pmu_event_read;
rapl_pmus->pmu.module = THIS_MODULE;
rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
- return 0;
+
+ return init_rapl_pmu();
}
static struct rapl_model model_snb = {

View File

@@ -1,71 +0,0 @@
From 8c7eb17e722a6a45c4436e5debb9336089b21d9b Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 2 Aug 2024 08:16:38 -0700
Subject: perf: Add PERF_EV_CAP_READ_SCOPE
Usually, an event can be read from any CPU of the scope. It doesn't need
to be read from the advertised CPU.
Add a new event cap, PERF_EV_CAP_READ_SCOPE. An event of a PMU with
scope can be read from any active CPU in the scope.
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
include/linux/perf_event.h | 3 +++
kernel/events/core.c | 14 +++++++++++---
2 files changed, 14 insertions(+), 3 deletions(-)
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -633,10 +633,13 @@ typedef void (*perf_overflow_handler_t)(
* PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and
* cannot be a group leader. If an event with this flag is detached from the
* group it is scheduled out and moved into an unrecoverable ERROR state.
+ * PERF_EV_CAP_READ_SCOPE: A CPU event that can be read from any CPU of the
+ * PMU scope where it is active.
*/
#define PERF_EV_CAP_SOFTWARE BIT(0)
#define PERF_EV_CAP_READ_ACTIVE_PKG BIT(1)
#define PERF_EV_CAP_SIBLING BIT(2)
+#define PERF_EV_CAP_READ_SCOPE BIT(3)
#define SWEVENT_HLIST_BITS 8
#define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4477,16 +4477,24 @@ struct perf_read_data {
int ret;
};
+static inline const struct cpumask *perf_scope_cpu_topology_cpumask(unsigned int scope, int cpu);
+
static int __perf_event_read_cpu(struct perf_event *event, int event_cpu)
{
+ int local_cpu = smp_processor_id();
u16 local_pkg, event_pkg;
if ((unsigned)event_cpu >= nr_cpu_ids)
return event_cpu;
- if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
- int local_cpu = smp_processor_id();
+ if (event->group_caps & PERF_EV_CAP_READ_SCOPE) {
+ const struct cpumask *cpumask = perf_scope_cpu_topology_cpumask(event->pmu->scope, event_cpu);
+
+ if (cpumask && cpumask_test_cpu(local_cpu, cpumask))
+ return local_cpu;
+ }
+ if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
event_pkg = topology_physical_package_id(event_cpu);
local_pkg = topology_physical_package_id(local_cpu);
@@ -11824,7 +11832,7 @@ static int perf_try_init_event(struct pm
if (cpu >= nr_cpu_ids)
ret = -ENODEV;
else
- event->cpu = cpu;
+ event->event_caps |= PERF_EV_CAP_READ_SCOPE;
} else {
ret = -ENODEV;
}

View File

@@ -1,6 +1,6 @@
From 7b4f6ba1b1dc5f3120652bcb5921a697d5167bff Mon Sep 17 00:00:00 2001
From 423bd96e0e4522b19e2dca083b70ebbebb639acd Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 2 Aug 2024 08:16:43 -0700
Date: Thu, 10 Oct 2024 07:26:04 -0700
Subject: perf/x86/rapl: Clean up cpumask and hotplug
The rapl pmu is die scope, which is supported by the generic perf_event
@@ -9,16 +9,17 @@ subsystem now.
Set the scope for the rapl PMU and remove all the cpumask and hotplug
codes.
Tested-by: Oliver Sang <oliver.sang@intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
---
arch/x86/events/rapl.c | 80 +-------------------------------------
arch/x86/events/rapl.c | 90 +++-----------------------------------
include/linux/cpuhotplug.h | 1 -
2 files changed, 2 insertions(+), 79 deletions(-)
2 files changed, 6 insertions(+), 85 deletions(-)
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -135,7 +135,6 @@ struct rapl_model {
@@ -148,7 +148,6 @@ struct rapl_model {
/* 1/2^hw_unit Joule */
static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
static struct rapl_pmus *rapl_pmus;
@@ -26,7 +27,7 @@ Cc: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
static unsigned int rapl_cntr_mask;
static u64 rapl_timer_ms;
static struct perf_msr *rapl_msrs;
@@ -340,8 +339,6 @@ static int rapl_pmu_event_init(struct pe
@@ -369,8 +368,6 @@ static int rapl_pmu_event_init(struct pe
if (event->cpu < 0)
return -EINVAL;
@@ -35,7 +36,7 @@ Cc: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
if (!cfg || cfg >= NR_RAPL_DOMAINS + 1)
return -EINVAL;
@@ -360,7 +357,6 @@ static int rapl_pmu_event_init(struct pe
@@ -389,7 +386,6 @@ static int rapl_pmu_event_init(struct pe
pmu = cpu_to_rapl_pmu(event->cpu);
if (!pmu)
return -EINVAL;
@@ -43,7 +44,7 @@ Cc: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
event->pmu_private = pmu;
event->hw.event_base = rapl_msrs[bit].msr;
event->hw.config = cfg;
@@ -374,23 +370,6 @@ static void rapl_pmu_event_read(struct p
@@ -403,23 +399,6 @@ static void rapl_pmu_event_read(struct p
rapl_event_update(event);
}
@@ -67,7 +68,7 @@ Cc: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
@@ -438,7 +417,6 @@ static struct attribute_group rapl_pmu_f
@@ -467,7 +446,6 @@ static struct attribute_group rapl_pmu_f
};
static const struct attribute_group *rapl_attr_groups[] = {
@@ -75,7 +76,7 @@ Cc: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
&rapl_pmu_format_group,
&rapl_pmu_events_group,
NULL,
@@ -541,49 +519,6 @@ static struct perf_msr amd_rapl_msrs[] =
@@ -570,54 +548,6 @@ static struct perf_msr amd_rapl_msrs[] =
[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 },
};
@@ -90,7 +91,7 @@ Cc: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
-
- pmu->cpu = -1;
- /* Find a new cpu to collect rapl events */
- target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
- target = cpumask_any_but(get_rapl_pmu_cpumask(cpu), cpu);
-
- /* Migrate rapl events to the new target */
- if (target < nr_cpu_ids) {
@@ -103,6 +104,11 @@ Cc: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
-
-static int rapl_cpu_online(unsigned int cpu)
-{
- s32 rapl_pmu_idx = get_rapl_pmu_idx(cpu);
- if (rapl_pmu_idx < 0) {
- pr_err("topology_logical_(package/die)_id() returned a negative value");
- return -EINVAL;
- }
- struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
- int target;
-
@@ -113,7 +119,7 @@ Cc: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
- * Check if there is an online cpu in the package which collects rapl
- * events already.
- */
- target = cpumask_any_and(&rapl_cpu_mask, topology_die_cpumask(cpu));
- target = cpumask_any_and(&rapl_cpu_mask, get_rapl_pmu_cpumask(cpu));
- if (target < nr_cpu_ids)
- return 0;
-
@@ -125,15 +131,29 @@ Cc: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
static int rapl_check_hw_unit(struct rapl_model *rm)
{
u64 msr_rapl_power_unit_bits;
@@ -707,6 +642,7 @@ static int __init init_rapl_pmus(void)
@@ -725,9 +655,12 @@ free:
static int __init init_rapl_pmus(void)
{
int nr_rapl_pmu = topology_max_packages();
+ int rapl_pmu_scope = PERF_PMU_SCOPE_PKG;
- if (!rapl_pmu_is_pkg_scope())
+ if (!rapl_pmu_is_pkg_scope()) {
nr_rapl_pmu *= topology_max_dies_per_package();
+ rapl_pmu_scope = PERF_PMU_SCOPE_DIE;
+ }
rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, nr_rapl_pmu), GFP_KERNEL);
if (!rapl_pmus)
@@ -743,6 +676,7 @@ static int __init init_rapl_pmus(void)
rapl_pmus->pmu.start = rapl_pmu_event_start;
rapl_pmus->pmu.stop = rapl_pmu_event_stop;
rapl_pmus->pmu.read = rapl_pmu_event_read;
+ rapl_pmus->pmu.scope = rapl_pmu_scope;
rapl_pmus->pmu.module = THIS_MODULE;
+ rapl_pmus->pmu.scope = PERF_PMU_SCOPE_DIE;
rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
init_rapl_pmu();
@@ -857,24 +793,13 @@ static int __init rapl_pmu_init(void)
@@ -892,24 +826,13 @@ static int __init rapl_pmu_init(void)
if (ret)
return ret;
@@ -159,7 +179,7 @@ Cc: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
out:
pr_warn("Initialization failed (%d), disabled\n", ret);
cleanup_rapl_pmus();
@@ -884,7 +809,6 @@ module_init(rapl_pmu_init);
@@ -919,7 +842,6 @@ module_init(rapl_pmu_init);
static void __exit intel_rapl_exit(void)
{
@@ -169,7 +189,7 @@ Cc: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
}
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -207,7 +207,6 @@ enum cpuhp_state {
@@ -208,7 +208,6 @@ enum cpuhp_state {
CPUHP_AP_PERF_X86_UNCORE_ONLINE,
CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
CPUHP_AP_PERF_X86_AMD_POWER_ONLINE,

View File

@@ -1,286 +0,0 @@
From 09c1529eb102b486220c35546f2663ca858a2943 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 2 Aug 2024 08:16:39 -0700
Subject: perf/x86/intel/cstate: Clean up cpumask and hotplug
There are three cstate PMUs with different scopes, core, die and module.
The scopes are supported by the generic perf_event subsystem now.
Set the scope for each PMU and remove all the cpumask and hotplug codes.
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
arch/x86/events/intel/cstate.c | 142 ++-------------------------------
include/linux/cpuhotplug.h | 2 -
2 files changed, 5 insertions(+), 139 deletions(-)
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -128,10 +128,6 @@ static ssize_t __cstate_##_var##_show(st
static struct device_attribute format_attr_##_var = \
__ATTR(_name, 0444, __cstate_##_var##_show, NULL)
-static ssize_t cstate_get_attr_cpumask(struct device *dev,
- struct device_attribute *attr,
- char *buf);
-
/* Model -> events mapping */
struct cstate_model {
unsigned long core_events;
@@ -206,22 +202,9 @@ static struct attribute_group cstate_for
.attrs = cstate_format_attrs,
};
-static cpumask_t cstate_core_cpu_mask;
-static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL);
-
-static struct attribute *cstate_cpumask_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL,
-};
-
-static struct attribute_group cpumask_attr_group = {
- .attrs = cstate_cpumask_attrs,
-};
-
static const struct attribute_group *cstate_attr_groups[] = {
&cstate_events_attr_group,
&cstate_format_attr_group,
- &cpumask_attr_group,
NULL,
};
@@ -269,8 +252,6 @@ static struct perf_msr pkg_msr[] = {
[PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &group_cstate_pkg_c10, test_msr },
};
-static cpumask_t cstate_pkg_cpu_mask;
-
/* cstate_module PMU */
static struct pmu cstate_module_pmu;
static bool has_cstate_module;
@@ -291,28 +272,9 @@ static struct perf_msr module_msr[] = {
[PERF_CSTATE_MODULE_C6_RES] = { MSR_MODULE_C6_RES_MS, &group_cstate_module_c6, test_msr },
};
-static cpumask_t cstate_module_cpu_mask;
-
-static ssize_t cstate_get_attr_cpumask(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct pmu *pmu = dev_get_drvdata(dev);
-
- if (pmu == &cstate_core_pmu)
- return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask);
- else if (pmu == &cstate_pkg_pmu)
- return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask);
- else if (pmu == &cstate_module_pmu)
- return cpumap_print_to_pagebuf(true, buf, &cstate_module_cpu_mask);
- else
- return 0;
-}
-
static int cstate_pmu_event_init(struct perf_event *event)
{
u64 cfg = event->attr.config;
- int cpu;
if (event->attr.type != event->pmu->type)
return -ENOENT;
@@ -331,20 +293,13 @@ static int cstate_pmu_event_init(struct
if (!(core_msr_mask & (1 << cfg)))
return -EINVAL;
event->hw.event_base = core_msr[cfg].msr;
- cpu = cpumask_any_and(&cstate_core_cpu_mask,
- topology_sibling_cpumask(event->cpu));
} else if (event->pmu == &cstate_pkg_pmu) {
if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
return -EINVAL;
cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX);
if (!(pkg_msr_mask & (1 << cfg)))
return -EINVAL;
-
- event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
-
event->hw.event_base = pkg_msr[cfg].msr;
- cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
- topology_die_cpumask(event->cpu));
} else if (event->pmu == &cstate_module_pmu) {
if (cfg >= PERF_CSTATE_MODULE_EVENT_MAX)
return -EINVAL;
@@ -352,16 +307,10 @@ static int cstate_pmu_event_init(struct
if (!(module_msr_mask & (1 << cfg)))
return -EINVAL;
event->hw.event_base = module_msr[cfg].msr;
- cpu = cpumask_any_and(&cstate_module_cpu_mask,
- topology_cluster_cpumask(event->cpu));
} else {
return -ENOENT;
}
- if (cpu >= nr_cpu_ids)
- return -ENODEV;
-
- event->cpu = cpu;
event->hw.config = cfg;
event->hw.idx = -1;
return 0;
@@ -412,84 +361,6 @@ static int cstate_pmu_event_add(struct p
return 0;
}
-/*
- * Check if exiting cpu is the designated reader. If so migrate the
- * events when there is a valid target available
- */
-static int cstate_cpu_exit(unsigned int cpu)
-{
- unsigned int target;
-
- if (has_cstate_core &&
- cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) {
-
- target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
- /* Migrate events if there is a valid target */
- if (target < nr_cpu_ids) {
- cpumask_set_cpu(target, &cstate_core_cpu_mask);
- perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
- }
- }
-
- if (has_cstate_pkg &&
- cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
-
- target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
- /* Migrate events if there is a valid target */
- if (target < nr_cpu_ids) {
- cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
- perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
- }
- }
-
- if (has_cstate_module &&
- cpumask_test_and_clear_cpu(cpu, &cstate_module_cpu_mask)) {
-
- target = cpumask_any_but(topology_cluster_cpumask(cpu), cpu);
- /* Migrate events if there is a valid target */
- if (target < nr_cpu_ids) {
- cpumask_set_cpu(target, &cstate_module_cpu_mask);
- perf_pmu_migrate_context(&cstate_module_pmu, cpu, target);
- }
- }
- return 0;
-}
-
-static int cstate_cpu_init(unsigned int cpu)
-{
- unsigned int target;
-
- /*
- * If this is the first online thread of that core, set it in
- * the core cpu mask as the designated reader.
- */
- target = cpumask_any_and(&cstate_core_cpu_mask,
- topology_sibling_cpumask(cpu));
-
- if (has_cstate_core && target >= nr_cpu_ids)
- cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
-
- /*
- * If this is the first online thread of that package, set it
- * in the package cpu mask as the designated reader.
- */
- target = cpumask_any_and(&cstate_pkg_cpu_mask,
- topology_die_cpumask(cpu));
- if (has_cstate_pkg && target >= nr_cpu_ids)
- cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
-
- /*
- * If this is the first online thread of that cluster, set it
- * in the cluster cpu mask as the designated reader.
- */
- target = cpumask_any_and(&cstate_module_cpu_mask,
- topology_cluster_cpumask(cpu));
- if (has_cstate_module && target >= nr_cpu_ids)
- cpumask_set_cpu(cpu, &cstate_module_cpu_mask);
-
- return 0;
-}
-
static const struct attribute_group *core_attr_update[] = {
&group_cstate_core_c1,
&group_cstate_core_c3,
@@ -526,6 +397,7 @@ static struct pmu cstate_core_pmu = {
.stop = cstate_pmu_event_stop,
.read = cstate_pmu_event_update,
.capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
+ .scope = PERF_PMU_SCOPE_CORE,
.module = THIS_MODULE,
};
@@ -541,6 +413,7 @@ static struct pmu cstate_pkg_pmu = {
.stop = cstate_pmu_event_stop,
.read = cstate_pmu_event_update,
.capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
+ .scope = PERF_PMU_SCOPE_PKG,
.module = THIS_MODULE,
};
@@ -556,6 +429,7 @@ static struct pmu cstate_module_pmu = {
.stop = cstate_pmu_event_stop,
.read = cstate_pmu_event_update,
.capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
+ .scope = PERF_PMU_SCOPE_CLUSTER,
.module = THIS_MODULE,
};
@@ -810,9 +684,6 @@ static int __init cstate_probe(const str
static inline void cstate_cleanup(void)
{
- cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE);
- cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING);
-
if (has_cstate_core)
perf_pmu_unregister(&cstate_core_pmu);
@@ -827,11 +698,6 @@ static int __init cstate_init(void)
{
int err;
- cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING,
- "perf/x86/cstate:starting", cstate_cpu_init, NULL);
- cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE,
- "perf/x86/cstate:online", NULL, cstate_cpu_exit);
-
if (has_cstate_core) {
err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
if (err) {
@@ -844,6 +710,8 @@ static int __init cstate_init(void)
if (has_cstate_pkg) {
if (topology_max_dies_per_package() > 1) {
+ /* CLX-AP is multi-die and the cstate is die-scope */
+ cstate_pkg_pmu.scope = PERF_PMU_SCOPE_DIE;
err = perf_pmu_register(&cstate_pkg_pmu,
"cstate_die", -1);
} else {
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -152,7 +152,6 @@ enum cpuhp_state {
CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
CPUHP_AP_PERF_X86_STARTING,
CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
- CPUHP_AP_PERF_X86_CSTATE_STARTING,
CPUHP_AP_PERF_XTENSA_STARTING,
CPUHP_AP_ARM_VFP_STARTING,
CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING,
@@ -209,7 +208,6 @@ enum cpuhp_state {
CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
CPUHP_AP_PERF_X86_AMD_POWER_ONLINE,
CPUHP_AP_PERF_X86_RAPL_ONLINE,
- CPUHP_AP_PERF_X86_CSTATE_ONLINE,
CPUHP_AP_PERF_S390_CF_ONLINE,
CPUHP_AP_PERF_S390_SF_ONLINE,
CPUHP_AP_PERF_ARM_CCI_ONLINE,

View File

@@ -0,0 +1,40 @@
From 9e3190a0f75324a7dcb66da336584918243b2c77 Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Date: Fri, 15 Nov 2024 06:07:57 +0000
Subject: perf/x86/rapl: Remove the unused get_rapl_pmu_cpumask() function
commit 9e9af8bbb5f9 ("perf/x86/rapl: Clean up cpumask and hotplug")
removes the cpumask handling from rapl. Post that, we no longer need the
get_rapl_pmu_cpumask() function. So remove it.
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Zhang Rui <rui.zhang@intel.com>
---
arch/x86/events/rapl.c | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -153,7 +153,7 @@ static u64 rapl_timer_ms;
static struct perf_msr *rapl_msrs;
/*
- * Helper functions to get the correct topology macros according to the
+ * Helper function to get the correct topology id according to the
* RAPL PMU scope.
*/
static inline unsigned int get_rapl_pmu_idx(int cpu)
@@ -162,12 +162,6 @@ static inline unsigned int get_rapl_pmu_
topology_logical_die_id(cpu);
}
-static inline const struct cpumask *get_rapl_pmu_cpumask(int cpu)
-{
- return rapl_pmu_is_pkg_scope() ? topology_core_cpumask(cpu) :
- topology_die_cpumask(cpu);
-}
-
static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
{
unsigned int rapl_pmu_idx = get_rapl_pmu_idx(cpu);

View File

@@ -1,188 +0,0 @@
From f91da33af8295b4b3d73a2083225f69e1d5ff301 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 2 Aug 2024 08:16:40 -0700
Subject: iommu/vt-d: Clean up cpumask and hotplug for perfmon
The iommu PMU is system-wide scope, which is supported by the generic
perf_event subsystem now.
Set the scope for the iommu PMU and remove all the cpumask and hotplug
codes.
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Will Deacon <will@kernel.org>
Cc: iommu@lists.linux.dev
---
drivers/iommu/intel/iommu.h | 2 -
drivers/iommu/intel/perfmon.c | 111 +---------------------------------
2 files changed, 2 insertions(+), 111 deletions(-)
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -687,8 +687,6 @@ struct iommu_pmu {
DECLARE_BITMAP(used_mask, IOMMU_PMU_IDX_MAX);
struct perf_event *event_list[IOMMU_PMU_IDX_MAX];
unsigned char irq_name[16];
- struct hlist_node cpuhp_node;
- int cpu;
};
#define IOMMU_IRQ_ID_OFFSET_PRQ (DMAR_UNITS_SUPPORTED)
--- a/drivers/iommu/intel/perfmon.c
+++ b/drivers/iommu/intel/perfmon.c
@@ -34,28 +34,9 @@ static struct attribute_group iommu_pmu_
.attrs = attrs_empty,
};
-static cpumask_t iommu_pmu_cpu_mask;
-
-static ssize_t
-cpumask_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
- return cpumap_print_to_pagebuf(true, buf, &iommu_pmu_cpu_mask);
-}
-static DEVICE_ATTR_RO(cpumask);
-
-static struct attribute *iommu_pmu_cpumask_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL
-};
-
-static struct attribute_group iommu_pmu_cpumask_attr_group = {
- .attrs = iommu_pmu_cpumask_attrs,
-};
-
static const struct attribute_group *iommu_pmu_attr_groups[] = {
&iommu_pmu_format_attr_group,
&iommu_pmu_events_attr_group,
- &iommu_pmu_cpumask_attr_group,
NULL
};
@@ -565,6 +546,7 @@ static int __iommu_pmu_register(struct i
iommu_pmu->pmu.attr_groups = iommu_pmu_attr_groups;
iommu_pmu->pmu.attr_update = iommu_pmu_attr_update;
iommu_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
+ iommu_pmu->pmu.scope = PERF_PMU_SCOPE_SYS_WIDE;
iommu_pmu->pmu.module = THIS_MODULE;
return perf_pmu_register(&iommu_pmu->pmu, iommu_pmu->pmu.name, -1);
@@ -773,89 +755,6 @@ static void iommu_pmu_unset_interrupt(st
iommu->perf_irq = 0;
}
-static int iommu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
-{
- struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node);
-
- if (cpumask_empty(&iommu_pmu_cpu_mask))
- cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask);
-
- if (cpumask_test_cpu(cpu, &iommu_pmu_cpu_mask))
- iommu_pmu->cpu = cpu;
-
- return 0;
-}
-
-static int iommu_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
-{
- struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node);
- int target = cpumask_first(&iommu_pmu_cpu_mask);
-
- /*
- * The iommu_pmu_cpu_mask has been updated when offline the CPU
- * for the first iommu_pmu. Migrate the other iommu_pmu to the
- * new target.
- */
- if (target < nr_cpu_ids && target != iommu_pmu->cpu) {
- perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target);
- iommu_pmu->cpu = target;
- return 0;
- }
-
- if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask))
- return 0;
-
- target = cpumask_any_but(cpu_online_mask, cpu);
-
- if (target < nr_cpu_ids)
- cpumask_set_cpu(target, &iommu_pmu_cpu_mask);
- else
- return 0;
-
- perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target);
- iommu_pmu->cpu = target;
-
- return 0;
-}
-
-static int nr_iommu_pmu;
-static enum cpuhp_state iommu_cpuhp_slot;
-
-static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu)
-{
- int ret;
-
- if (!nr_iommu_pmu) {
- ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
- "driver/iommu/intel/perfmon:online",
- iommu_pmu_cpu_online,
- iommu_pmu_cpu_offline);
- if (ret < 0)
- return ret;
- iommu_cpuhp_slot = ret;
- }
-
- ret = cpuhp_state_add_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node);
- if (ret) {
- if (!nr_iommu_pmu)
- cpuhp_remove_multi_state(iommu_cpuhp_slot);
- return ret;
- }
- nr_iommu_pmu++;
-
- return 0;
-}
-
-static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu)
-{
- cpuhp_state_remove_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node);
-
- if (--nr_iommu_pmu)
- return;
-
- cpuhp_remove_multi_state(iommu_cpuhp_slot);
-}
-
void iommu_pmu_register(struct intel_iommu *iommu)
{
struct iommu_pmu *iommu_pmu = iommu->pmu;
@@ -866,17 +765,12 @@ void iommu_pmu_register(struct intel_iom
if (__iommu_pmu_register(iommu))
goto err;
- if (iommu_pmu_cpuhp_setup(iommu_pmu))
- goto unregister;
-
/* Set interrupt for overflow */
if (iommu_pmu_set_interrupt(iommu))
- goto cpuhp_free;
+ goto unregister;
return;
-cpuhp_free:
- iommu_pmu_cpuhp_free(iommu_pmu);
unregister:
perf_pmu_unregister(&iommu_pmu->pmu);
err:
@@ -892,6 +786,5 @@ void iommu_pmu_unregister(struct intel_i
return;
iommu_pmu_unset_interrupt(iommu);
- iommu_pmu_cpuhp_free(iommu_pmu);
perf_pmu_unregister(&iommu_pmu->pmu);
}

View File

@@ -1,19 +1,21 @@
From 9439067951f4d857272836b35812af26650d9c16 Mon Sep 17 00:00:00 2001
From 947046055803695b05b1021893860c50412a8d7b Mon Sep 17 00:00:00 2001
From: K Prateek Nayak <kprateek.nayak@amd.com>
Date: Fri, 13 Sep 2024 15:21:41 +0000
Date: Fri, 15 Nov 2024 06:07:58 +0000
Subject: x86/topology: Introduce topology_logical_core_id()
On x86, topology_core_id() returns a unique core ID within the PKG
domain. Looking at match_smt() suggests that a core ID just needs to be
unique within a LLC domain. For use cases such as the per-core RAPL PMU,
unique within a LLC domain. For use cases such as the core RAPL PMU,
there exists a need for a unique core ID across the entire system with
multiple PKG domains. Introduce topology_logical_core_id() to derive a
unique core ID across the system.
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
---
Documentation/arch/x86/topology.rst | 4 ++++
arch/x86/include/asm/processor.h | 1 +
@@ -47,7 +49,7 @@ Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
u32 amd_node_id;
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -137,6 +137,7 @@ extern const struct cpumask *cpu_cluster
@@ -143,6 +143,7 @@ extern const struct cpumask *cpu_cluster
#define topology_logical_package_id(cpu) (cpu_data(cpu).topo.logical_pkg_id)
#define topology_physical_package_id(cpu) (cpu_data(cpu).topo.pkg_id)
#define topology_logical_die_id(cpu) (cpu_data(cpu).topo.logical_die_id)
@@ -57,8 +59,8 @@ Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
#define topology_ppin(cpu) (cpu_data(cpu).ppin)
--- a/arch/x86/kernel/cpu/debugfs.c
+++ b/arch/x86/kernel/cpu/debugfs.c
@@ -24,6 +24,7 @@ static int cpu_debug_show(struct seq_fil
seq_printf(m, "core_id: %u\n", c->topo.core_id);
@@ -25,6 +25,7 @@ static int cpu_debug_show(struct seq_fil
seq_printf(m, "cpu_type: %s\n", get_topology_cpu_type_name(c));
seq_printf(m, "logical_pkg_id: %u\n", c->topo.logical_pkg_id);
seq_printf(m, "logical_die_id: %u\n", c->topo.logical_die_id);
+ seq_printf(m, "logical_core_id: %u\n", c->topo.logical_core_id);
@@ -67,7 +69,7 @@ Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
seq_printf(m, "amd_node_id: %u\n", c->topo.amd_node_id);
--- a/arch/x86/kernel/cpu/topology_common.c
+++ b/arch/x86/kernel/cpu/topology_common.c
@@ -151,6 +151,7 @@ static void topo_set_ids(struct topo_sca
@@ -185,6 +185,7 @@ static void topo_set_ids(struct topo_sca
if (!early) {
c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN);
c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN);

View File

@@ -1,238 +0,0 @@
From 76278bd3946d618ead2d9cc22612a75a4ab99ace Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 2 Aug 2024 08:16:41 -0700
Subject: dmaengine: idxd: Clean up cpumask and hotplug for perfmon
The idxd PMU is system-wide scope, which is supported by the generic
perf_event subsystem now.
Set the scope for the idxd PMU and remove all the cpumask and hotplug
codes.
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Vinod Koul <vkoul@kernel.org>
Cc: dmaengine@vger.kernel.org
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Fenghua Yu <fenghua.yu@intel.com>
---
drivers/dma/idxd/idxd.h | 7 ---
drivers/dma/idxd/init.c | 3 --
drivers/dma/idxd/perfmon.c | 98 +-------------------------------------
3 files changed, 1 insertion(+), 107 deletions(-)
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -124,7 +124,6 @@ struct idxd_pmu {
struct pmu pmu;
char name[IDXD_NAME_SIZE];
- int cpu;
int n_counters;
int counter_width;
@@ -135,8 +134,6 @@ struct idxd_pmu {
unsigned long supported_filters;
int n_filters;
-
- struct hlist_node cpuhp_node;
};
#define IDXD_MAX_PRIORITY 0xf
@@ -803,14 +800,10 @@ void idxd_user_counter_increment(struct
int perfmon_pmu_init(struct idxd_device *idxd);
void perfmon_pmu_remove(struct idxd_device *idxd);
void perfmon_counter_overflow(struct idxd_device *idxd);
-void perfmon_init(void);
-void perfmon_exit(void);
#else
static inline int perfmon_pmu_init(struct idxd_device *idxd) { return 0; }
static inline void perfmon_pmu_remove(struct idxd_device *idxd) {}
static inline void perfmon_counter_overflow(struct idxd_device *idxd) {}
-static inline void perfmon_init(void) {}
-static inline void perfmon_exit(void) {}
#endif
/* debugfs */
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -878,8 +878,6 @@ static int __init idxd_init_module(void)
else
support_enqcmd = true;
- perfmon_init();
-
err = idxd_driver_register(&idxd_drv);
if (err < 0)
goto err_idxd_driver_register;
@@ -928,7 +926,6 @@ static void __exit idxd_exit_module(void
idxd_driver_unregister(&idxd_drv);
pci_unregister_driver(&idxd_pci_driver);
idxd_cdev_remove();
- perfmon_exit();
idxd_remove_debugfs();
}
module_exit(idxd_exit_module);
--- a/drivers/dma/idxd/perfmon.c
+++ b/drivers/dma/idxd/perfmon.c
@@ -6,29 +6,6 @@
#include "idxd.h"
#include "perfmon.h"
-static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
- char *buf);
-
-static cpumask_t perfmon_dsa_cpu_mask;
-static bool cpuhp_set_up;
-static enum cpuhp_state cpuhp_slot;
-
-/*
- * perf userspace reads this attribute to determine which cpus to open
- * counters on. It's connected to perfmon_dsa_cpu_mask, which is
- * maintained by the cpu hotplug handlers.
- */
-static DEVICE_ATTR_RO(cpumask);
-
-static struct attribute *perfmon_cpumask_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL,
-};
-
-static struct attribute_group cpumask_attr_group = {
- .attrs = perfmon_cpumask_attrs,
-};
-
/*
* These attributes specify the bits in the config word that the perf
* syscall uses to pass the event ids and categories to perfmon.
@@ -67,16 +44,9 @@ static struct attribute_group perfmon_fo
static const struct attribute_group *perfmon_attr_groups[] = {
&perfmon_format_attr_group,
- &cpumask_attr_group,
NULL,
};
-static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- return cpumap_print_to_pagebuf(true, buf, &perfmon_dsa_cpu_mask);
-}
-
static bool is_idxd_event(struct idxd_pmu *idxd_pmu, struct perf_event *event)
{
return &idxd_pmu->pmu == event->pmu;
@@ -217,7 +187,6 @@ static int perfmon_pmu_event_init(struct
return -EINVAL;
event->hw.event_base = ioread64(PERFMON_TABLE_OFFSET(idxd));
- event->cpu = idxd->idxd_pmu->cpu;
event->hw.config = event->attr.config;
if (event->group_leader != event)
@@ -488,6 +457,7 @@ static void idxd_pmu_init(struct idxd_pm
idxd_pmu->pmu.stop = perfmon_pmu_event_stop;
idxd_pmu->pmu.read = perfmon_pmu_event_update;
idxd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
+ idxd_pmu->pmu.scope = PERF_PMU_SCOPE_SYS_WIDE;
idxd_pmu->pmu.module = THIS_MODULE;
}
@@ -496,47 +466,11 @@ void perfmon_pmu_remove(struct idxd_devi
if (!idxd->idxd_pmu)
return;
- cpuhp_state_remove_instance(cpuhp_slot, &idxd->idxd_pmu->cpuhp_node);
perf_pmu_unregister(&idxd->idxd_pmu->pmu);
kfree(idxd->idxd_pmu);
idxd->idxd_pmu = NULL;
}
-static int perf_event_cpu_online(unsigned int cpu, struct hlist_node *node)
-{
- struct idxd_pmu *idxd_pmu;
-
- idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node);
-
- /* select the first online CPU as the designated reader */
- if (cpumask_empty(&perfmon_dsa_cpu_mask)) {
- cpumask_set_cpu(cpu, &perfmon_dsa_cpu_mask);
- idxd_pmu->cpu = cpu;
- }
-
- return 0;
-}
-
-static int perf_event_cpu_offline(unsigned int cpu, struct hlist_node *node)
-{
- struct idxd_pmu *idxd_pmu;
- unsigned int target;
-
- idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node);
-
- if (!cpumask_test_and_clear_cpu(cpu, &perfmon_dsa_cpu_mask))
- return 0;
-
- target = cpumask_any_but(cpu_online_mask, cpu);
- /* migrate events if there is a valid target */
- if (target < nr_cpu_ids) {
- cpumask_set_cpu(target, &perfmon_dsa_cpu_mask);
- perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target);
- }
-
- return 0;
-}
-
int perfmon_pmu_init(struct idxd_device *idxd)
{
union idxd_perfcap perfcap;
@@ -544,12 +478,6 @@ int perfmon_pmu_init(struct idxd_device
int rc = -ENODEV;
/*
- * perfmon module initialization failed, nothing to do
- */
- if (!cpuhp_set_up)
- return -ENODEV;
-
- /*
* If perfmon_offset or num_counters is 0, it means perfmon is
* not supported on this hardware.
*/
@@ -624,11 +552,6 @@ int perfmon_pmu_init(struct idxd_device
if (rc)
goto free;
- rc = cpuhp_state_add_instance(cpuhp_slot, &idxd_pmu->cpuhp_node);
- if (rc) {
- perf_pmu_unregister(&idxd->idxd_pmu->pmu);
- goto free;
- }
out:
return rc;
free:
@@ -637,22 +560,3 @@ free:
goto out;
}
-
-void __init perfmon_init(void)
-{
- int rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
- "driver/dma/idxd/perf:online",
- perf_event_cpu_online,
- perf_event_cpu_offline);
- if (WARN_ON(rc < 0))
- return;
-
- cpuhp_slot = rc;
- cpuhp_set_up = true;
-}
-
-void __exit perfmon_exit(void)
-{
- if (cpuhp_set_up)
- cpuhp_remove_multi_state(cpuhp_slot);
-}

View File

@@ -0,0 +1,69 @@
From 8fd2da09b0c534e05a7b12eb578afae27ad20f7d Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Date: Fri, 15 Nov 2024 06:07:59 +0000
Subject: perf/x86/rapl: Remove the cpu_to_rapl_pmu() function
Prepare for the addition of RAPL core energy counter support.
Post which, one CPU might be mapped to more than one rapl_pmu
(package/die one and a core one). So, remove the cpu_to_rapl_pmu()
function.
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Zhang Rui <rui.zhang@intel.com>
---
arch/x86/events/rapl.c | 24 +++++++++++-------------
1 file changed, 11 insertions(+), 13 deletions(-)
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -157,22 +157,15 @@ static struct perf_msr *rapl_msrs;
* RAPL PMU scope.
*/
static inline unsigned int get_rapl_pmu_idx(int cpu)
-{
+{ /*
+ * Returns unsigned int, which converts the '-1' return value
+ * (for non-existent mappings in topology map) to UINT_MAX, so
+ * the error check in the caller is simplified.
+ */
return rapl_pmu_is_pkg_scope() ? topology_logical_package_id(cpu) :
topology_logical_die_id(cpu);
}
-static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
-{
- unsigned int rapl_pmu_idx = get_rapl_pmu_idx(cpu);
-
- /*
- * The unsigned check also catches the '-1' return value for non
- * existent mappings in the topology map.
- */
- return rapl_pmu_idx < rapl_pmus->nr_rapl_pmu ? rapl_pmus->pmus[rapl_pmu_idx] : NULL;
-}
-
static inline u64 rapl_read_counter(struct perf_event *event)
{
u64 raw;
@@ -350,6 +343,7 @@ static int rapl_pmu_event_init(struct pe
u64 cfg = event->attr.config & RAPL_EVENT_MASK;
int bit, ret = 0;
struct rapl_pmu *pmu;
+ unsigned int rapl_pmu_idx;
/* only look at RAPL events */
if (event->attr.type != rapl_pmus->pmu.type)
@@ -376,8 +370,12 @@ static int rapl_pmu_event_init(struct pe
if (event->attr.sample_period) /* no sampling */
return -EINVAL;
+ rapl_pmu_idx = get_rapl_pmu_idx(event->cpu);
+ if (rapl_pmu_idx >= rapl_pmus->nr_rapl_pmu)
+ return -EINVAL;
+
/* must be done before validate_group */
- pmu = cpu_to_rapl_pmu(event->cpu);
+ pmu = rapl_pmus->pmus[rapl_pmu_idx];
if (!pmu)
return -EINVAL;
event->pmu_private = pmu;

View File

@@ -1,84 +0,0 @@
From fb0a3b5932882f02ed42fcaa6db73aba3eafd6d7 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 2 Aug 2024 08:16:42 -0700
Subject: perf/x86/rapl: Move the pmu allocation out of CPU hotplug
The rapl pmu just needs to be allocated once. It doesn't matter to be
allocated at each CPU hotplug, or the global init_rapl_pmus().
Move the pmu allocation to the init_rapl_pmus(). So the generic hotplug
supports can be applied.
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
---
arch/x86/events/rapl.c | 44 +++++++++++++++++++++++++++++-------------
1 file changed, 31 insertions(+), 13 deletions(-)
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -568,19 +568,8 @@ static int rapl_cpu_online(unsigned int
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
int target;
- if (!pmu) {
- pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
- if (!pmu)
- return -ENOMEM;
-
- raw_spin_lock_init(&pmu->lock);
- INIT_LIST_HEAD(&pmu->active_list);
- pmu->pmu = &rapl_pmus->pmu;
- pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
- rapl_hrtimer_init(pmu);
-
- rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu;
- }
+ if (!pmu)
+ return -ENOMEM;
/*
* Check if there is an online cpu in the package which collects rapl
@@ -673,6 +662,32 @@ static const struct attribute_group *rap
NULL,
};
+static void __init init_rapl_pmu(void)
+{
+ struct rapl_pmu *pmu;
+ int cpu;
+
+ cpus_read_lock();
+
+ for_each_cpu(cpu, cpu_online_mask) {
+ pmu = cpu_to_rapl_pmu(cpu);
+ if (pmu)
+ continue;
+ pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
+ if (!pmu)
+ continue;
+ raw_spin_lock_init(&pmu->lock);
+ INIT_LIST_HEAD(&pmu->active_list);
+ pmu->pmu = &rapl_pmus->pmu;
+ pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+ rapl_hrtimer_init(pmu);
+
+ rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu;
+ }
+
+ cpus_read_unlock();
+}
+
static int __init init_rapl_pmus(void)
{
int nr_rapl_pmu = topology_max_packages() * topology_max_dies_per_package();
@@ -693,6 +708,9 @@ static int __init init_rapl_pmus(void)
rapl_pmus->pmu.read = rapl_pmu_event_read;
rapl_pmus->pmu.module = THIS_MODULE;
rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
+
+ init_rapl_pmu();
+
return 0;
}

View File

@@ -1,6 +1,6 @@
From 07ec9f38cac6eb6e5b0b062ef99e9458ba567de8 Mon Sep 17 00:00:00 2001
From 30e2cd787aeb9cb9c1148e07446aac76765f715e Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Date: Fri, 13 Sep 2024 15:21:43 +0000
Date: Fri, 15 Nov 2024 06:08:00 +0000
Subject: perf/x86/rapl: Rename rapl_pmu variables
Rename struct rapl_pmu variables from "pmu" to "rapl_pmu", to
@@ -15,13 +15,16 @@ Also rename "pmus" member in rapl_pmus struct, for same reason.
No functional change.
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Zhang Rui <rui.zhang@intel.com>
---
arch/x86/events/rapl.c | 93 +++++++++++++++++++++---------------------
1 file changed, 47 insertions(+), 46 deletions(-)
arch/x86/events/rapl.c | 91 +++++++++++++++++++++---------------------
1 file changed, 46 insertions(+), 45 deletions(-)
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -116,7 +116,7 @@ struct rapl_pmu {
@@ -129,7 +129,7 @@ struct rapl_pmu {
struct rapl_pmus {
struct pmu pmu;
unsigned int nr_rapl_pmu;
@@ -30,7 +33,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
};
enum rapl_unit_quirk {
@@ -223,34 +223,34 @@ static void rapl_start_hrtimer(struct ra
@@ -227,34 +227,34 @@ static void rapl_start_hrtimer(struct ra
static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
{
@@ -74,7 +77,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
struct perf_event *event)
{
if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
@@ -258,39 +258,39 @@ static void __rapl_pmu_event_start(struc
@@ -262,39 +262,39 @@ static void __rapl_pmu_event_start(struc
event->hw.state = 0;
@@ -128,7 +131,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
list_del(&event->active_entry);
@@ -308,23 +308,23 @@ static void rapl_pmu_event_stop(struct p
@@ -312,23 +312,23 @@ static void rapl_pmu_event_stop(struct p
hwc->state |= PERF_HES_UPTODATE;
}
@@ -157,16 +160,16 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
return 0;
}
@@ -338,7 +338,7 @@ static int rapl_pmu_event_init(struct pe
@@ -342,7 +342,7 @@ static int rapl_pmu_event_init(struct pe
{
u64 cfg = event->attr.config & RAPL_EVENT_MASK;
int bit, rapl_pmu_idx, ret = 0;
int bit, ret = 0;
- struct rapl_pmu *pmu;
+ struct rapl_pmu *rapl_pmu;
unsigned int rapl_pmu_idx;
/* only look at RAPL events */
if (event->attr.type != rapl_pmus->pmu.type)
@@ -370,10 +370,11 @@ static int rapl_pmu_event_init(struct pe
@@ -375,10 +375,11 @@ static int rapl_pmu_event_init(struct pe
return -EINVAL;
/* must be done before validate_group */
@@ -181,7 +184,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
event->hw.event_base = rapl_msrs[bit].msr;
event->hw.config = cfg;
event->hw.idx = bit;
@@ -600,7 +601,7 @@ static void cleanup_rapl_pmus(void)
@@ -605,7 +606,7 @@ static void cleanup_rapl_pmus(void)
int i;
for (i = 0; i < rapl_pmus->nr_rapl_pmu; i++)
@@ -190,29 +193,21 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
kfree(rapl_pmus);
}
@@ -615,7 +616,7 @@ static const struct attribute_group *rap
@@ -620,27 +621,27 @@ static const struct attribute_group *rap
static void __init init_rapl_pmu(void)
static int __init init_rapl_pmu(void)
{
- struct rapl_pmu *pmu;
+ struct rapl_pmu *rapl_pmu;
int cpu, rapl_pmu_idx;
int idx;
cpus_read_lock();
@@ -625,19 +626,19 @@ static void __init init_rapl_pmu(void)
if (rapl_pmu_idx >= rapl_pmus->nr_rapl_pmu)
continue;
- pmu = rapl_pmus->pmus[rapl_pmu_idx];
- if (pmu)
+ rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx];
+ if (rapl_pmu)
continue;
- pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
for (idx = 0; idx < rapl_pmus->nr_rapl_pmu; idx++) {
- pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
- if (!pmu)
+ rapl_pmu = kzalloc_node(sizeof(*rapl_pmu), GFP_KERNEL, cpu_to_node(cpu));
+ rapl_pmu = kzalloc(sizeof(*rapl_pmu), GFP_KERNEL);
+ if (!rapl_pmu)
continue;
goto free;
- raw_spin_lock_init(&pmu->lock);
- INIT_LIST_HEAD(&pmu->active_list);
- pmu->pmu = &rapl_pmus->pmu;
@@ -224,13 +219,20 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+ rapl_pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+ rapl_hrtimer_init(rapl_pmu);
- rapl_pmus->pmus[rapl_pmu_idx] = pmu;
+ rapl_pmus->rapl_pmu[rapl_pmu_idx] = rapl_pmu;
- rapl_pmus->pmus[idx] = pmu;
+ rapl_pmus->rapl_pmu[idx] = rapl_pmu;
}
cpus_read_unlock();
@@ -653,7 +654,7 @@ static int __init init_rapl_pmus(void)
rapl_pmu_scope = PERF_PMU_SCOPE_PKG;
return 0;
free:
for (; idx > 0; idx--)
- kfree(rapl_pmus->pmus[idx - 1]);
+ kfree(rapl_pmus->rapl_pmu[idx - 1]);
return -ENOMEM;
}
@@ -654,7 +655,7 @@ static int __init init_rapl_pmus(void)
rapl_pmu_scope = PERF_PMU_SCOPE_DIE;
}
- rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, nr_rapl_pmu), GFP_KERNEL);

View File

@@ -1,9 +1,9 @@
From 68614752b9fd6b6bae6f9ab7b02fc28350c5a541 Mon Sep 17 00:00:00 2001
From 0f84d3bd0af2c300fe13d9f0f5131d0747a13f9e Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Date: Fri, 13 Sep 2024 15:47:56 +0000
Date: Fri, 15 Nov 2024 06:08:01 +0000
Subject: perf/x86/rapl: Make rapl_model struct global
Preparation for per-core energy counter support addition for AMD CPUs.
Prepare for the addition of RAPL core energy counter support.
As there will always be just one rapl_model variable on a system, make it
global, to make it easier to access it from any function.
@@ -11,21 +11,24 @@ global, to make it easier to access it from any function.
No functional change.
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
---
arch/x86/events/rapl.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -138,6 +138,7 @@ static struct rapl_pmus *rapl_pmus;
@@ -151,6 +151,7 @@ static struct rapl_pmus *rapl_pmus;
static unsigned int rapl_cntr_mask;
static u64 rapl_timer_ms;
static struct perf_msr *rapl_msrs;
+static struct rapl_model *rapl_model;
/*
* RAPL Package energy counter scope:
@@ -536,18 +537,18 @@ static struct perf_msr amd_rapl_msrs[] =
* Helper function to get the correct topology id according to the
@@ -541,18 +542,18 @@ static struct perf_msr amd_rapl_msrs[] =
[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 },
};
@@ -47,7 +50,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
/*
* DRAM domain on HSW server and KNL has fixed energy unit which can be
* different than the unit from power unit MSR. See
@@ -798,21 +799,20 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_m
@@ -797,21 +798,20 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_m
static int __init rapl_pmu_init(void)
{
const struct x86_cpu_id *id;

View File

@@ -1,20 +1,26 @@
From b10b887510ccb0b6bc7294888982b862703c9c32 Mon Sep 17 00:00:00 2001
From 395cf3513aaf79c95c611ce97ba451eaf6470c44 Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Date: Fri, 13 Sep 2024 15:47:57 +0000
Subject: perf/x86/rapl: Add arguments to the cleanup and init functions
Date: Fri, 15 Nov 2024 06:08:02 +0000
Subject: perf/x86/rapl: Add arguments to the init and cleanup functions
Prep for per-core RAPL PMU addition.
Prepare for the addition of RAPL core energy counter support.
Add arguments to the init and cleanup functions, which will help in
initialization and cleaning up of two separate PMUs.
No functional change.
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Zhang Rui <rui.zhang@intel.com>
---
arch/x86/events/rapl.c | 32 +++++++++++++++++++-------------
1 file changed, 19 insertions(+), 13 deletions(-)
arch/x86/events/rapl.c | 28 ++++++++++++++++------------
1 file changed, 16 insertions(+), 12 deletions(-)
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -597,7 +597,7 @@ static void __init rapl_advertise(void)
@@ -602,7 +602,7 @@ static void __init rapl_advertise(void)
}
}
@@ -23,35 +29,32 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
{
int i;
@@ -615,7 +615,7 @@ static const struct attribute_group *rap
@@ -620,7 +620,7 @@ static const struct attribute_group *rap
NULL,
};
-static void __init init_rapl_pmu(void)
+static void __init init_rapl_pmu(struct rapl_pmus *rapl_pmus)
-static int __init init_rapl_pmu(void)
+static int __init init_rapl_pmu(struct rapl_pmus *rapl_pmus)
{
struct rapl_pmu *rapl_pmu;
int cpu, rapl_pmu_idx;
@@ -645,20 +645,22 @@ static void __init init_rapl_pmu(void)
cpus_read_unlock();
int idx;
@@ -646,20 +646,20 @@ free:
return -ENOMEM;
}
-static int __init init_rapl_pmus(void)
+static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int rapl_pmu_scope)
{
- int nr_rapl_pmu = topology_max_packages() * topology_max_dies_per_package();
- int rapl_pmu_scope = PERF_PMU_SCOPE_DIE;
+ int nr_rapl_pmu;
int nr_rapl_pmu = topology_max_packages();
- int rapl_pmu_scope = PERF_PMU_SCOPE_PKG;
+ struct rapl_pmus *rapl_pmus;
- if (rapl_pmu_is_pkg_scope()) {
- nr_rapl_pmu = topology_max_packages();
- rapl_pmu_scope = PERF_PMU_SCOPE_PKG;
- if (!rapl_pmu_is_pkg_scope()) {
- nr_rapl_pmu *= topology_max_dies_per_package();
- rapl_pmu_scope = PERF_PMU_SCOPE_DIE;
- }
+ if (rapl_pmu_scope == PERF_PMU_SCOPE_PKG)
+ nr_rapl_pmu = topology_max_packages();
+ else
+ nr_rapl_pmu = topology_max_packages() * topology_max_dies_per_package();
+ if (rapl_pmu_scope == PERF_PMU_SCOPE_DIE)
+ nr_rapl_pmu *= topology_max_dies_per_package();
rapl_pmus = kzalloc(struct_size(rapl_pmus, rapl_pmu, nr_rapl_pmu), GFP_KERNEL);
if (!rapl_pmus)
@@ -62,16 +65,16 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
rapl_pmus->nr_rapl_pmu = nr_rapl_pmu;
rapl_pmus->pmu.attr_groups = rapl_attr_groups;
rapl_pmus->pmu.attr_update = rapl_attr_update;
@@ -673,7 +675,7 @@ static int __init init_rapl_pmus(void)
@@ -674,7 +674,7 @@ static int __init init_rapl_pmus(void)
rapl_pmus->pmu.module = THIS_MODULE;
rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
- init_rapl_pmu();
+ init_rapl_pmu(rapl_pmus);
return 0;
- return init_rapl_pmu();
+ return init_rapl_pmu(rapl_pmus);
}
@@ -799,8 +801,12 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_m
static struct rapl_model model_snb = {
@@ -798,8 +798,12 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_m
static int __init rapl_pmu_init(void)
{
const struct x86_cpu_id *id;
@@ -84,7 +87,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
id = x86_match_cpu(rapl_model_match);
if (!id)
return -ENODEV;
@@ -816,7 +822,7 @@ static int __init rapl_pmu_init(void)
@@ -815,7 +819,7 @@ static int __init rapl_pmu_init(void)
if (ret)
return ret;
@@ -93,7 +96,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
if (ret)
return ret;
@@ -829,7 +835,7 @@ static int __init rapl_pmu_init(void)
@@ -828,7 +832,7 @@ static int __init rapl_pmu_init(void)
out:
pr_warn("Initialization failed (%d), disabled\n", ret);
@@ -102,7 +105,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
return ret;
}
module_init(rapl_pmu_init);
@@ -837,6 +843,6 @@ module_init(rapl_pmu_init);
@@ -836,6 +840,6 @@ module_init(rapl_pmu_init);
static void __exit intel_rapl_exit(void)
{
perf_pmu_unregister(&rapl_pmus->pmu);

View File

@@ -1,101 +0,0 @@
From f1525664ff9da3241b3556594dc0b67506ae1ddd Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Date: Tue, 10 Sep 2024 14:25:05 +0530
Subject: perf/x86/rapl: Fix the energy-pkg event for AMD CPUs
After commit ("x86/cpu/topology: Add support for the AMD 0x80000026 leaf"),
on AMD processors that support extended CPUID leaf 0x80000026, the
topology_die_cpumask() and topology_logical_die_id() macros, no longer
return the package cpumask and package id, instead they return the CCD
(Core Complex Die) mask and id respectively. This leads to the energy-pkg
event scope to be modified to CCD instead of package.
So, change the PMU scope for AMD and Hygon back to package.
On a 12 CCD 1 Package AMD Zen4 Genoa machine:
Before:
$ cat /sys/devices/power/cpumask
0,8,16,24,32,40,48,56,64,72,80,88.
The expected cpumask here is supposed to be just "0", as it is a package
scope event, only one CPU will be collecting the event for all the CPUs in
the package.
After:
$ cat /sys/devices/power/cpumask
0
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
---
arch/x86/events/rapl.c | 35 ++++++++++++++++++++++++++++++++---
1 file changed, 32 insertions(+), 3 deletions(-)
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -139,9 +139,32 @@ static unsigned int rapl_cntr_mask;
static u64 rapl_timer_ms;
static struct perf_msr *rapl_msrs;
+/*
+ * RAPL Package energy counter scope:
+ * 1. AMD/HYGON platforms have a per-PKG package energy counter
+ * 2. For Intel platforms
+ * 2.1. CLX-AP is multi-die and its RAPL MSRs are die-scope
+ * 2.2. Other Intel platforms are single die systems so the scope can be
+ * considered as either pkg-scope or die-scope, and we are considering
+ * them as die-scope.
+ */
+#define rapl_pmu_is_pkg_scope() \
+ (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || \
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
+
+/*
+ * Helper function to get the correct topology id according to the
+ * RAPL PMU scope.
+ */
+static inline unsigned int get_rapl_pmu_idx(int cpu)
+{
+ return rapl_pmu_is_pkg_scope() ? topology_logical_package_id(cpu) :
+ topology_logical_die_id(cpu);
+}
+
static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
{
- unsigned int rapl_pmu_idx = topology_logical_die_id(cpu);
+ unsigned int rapl_pmu_idx = get_rapl_pmu_idx(cpu);
/*
* The unsigned check also catches the '-1' return value for non
@@ -617,7 +640,7 @@ static void __init init_rapl_pmu(void)
pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
rapl_hrtimer_init(pmu);
- rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu;
+ rapl_pmus->pmus[get_rapl_pmu_idx(cpu)] = pmu;
}
cpus_read_unlock();
@@ -626,6 +649,12 @@ static void __init init_rapl_pmu(void)
static int __init init_rapl_pmus(void)
{
int nr_rapl_pmu = topology_max_packages() * topology_max_dies_per_package();
+ int rapl_pmu_scope = PERF_PMU_SCOPE_DIE;
+
+ if (rapl_pmu_is_pkg_scope()) {
+ nr_rapl_pmu = topology_max_packages();
+ rapl_pmu_scope = PERF_PMU_SCOPE_PKG;
+ }
rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, nr_rapl_pmu), GFP_KERNEL);
if (!rapl_pmus)
@@ -641,8 +670,8 @@ static int __init init_rapl_pmus(void)
rapl_pmus->pmu.start = rapl_pmu_event_start;
rapl_pmus->pmu.stop = rapl_pmu_event_stop;
rapl_pmus->pmu.read = rapl_pmu_event_read;
+ rapl_pmus->pmu.scope = rapl_pmu_scope;
rapl_pmus->pmu.module = THIS_MODULE;
- rapl_pmus->pmu.scope = PERF_PMU_SCOPE_DIE;
rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
init_rapl_pmu();

View File

@@ -1,20 +1,22 @@
From b5c83c40540298a39f8314034b705f1236b17a9f Mon Sep 17 00:00:00 2001
From 98d0cb818ba4695b9a41ab83b7c00ec1cbdf1b35 Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Date: Fri, 13 Sep 2024 15:47:58 +0000
Date: Fri, 15 Nov 2024 06:08:03 +0000
Subject: perf/x86/rapl: Modify the generic variable names to *_pkg*
Prep for addition of power_per_core PMU to handle core scope energy
consumption for AMD CPUs.
Prepare for the addition of RAPL core energy counter support.
Replace the generic names with *_pkg*, to differentiate between the
scopes of the two different PMUs and their variables.
Replace the generic names with *_pkg*, to later on differentiate between
the scopes of the two different PMUs and their variables.
No functional change.
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Zhang Rui <rui.zhang@intel.com>
---
arch/x86/events/rapl.c | 118 ++++++++++++++++++++---------------------
1 file changed, 59 insertions(+), 59 deletions(-)
arch/x86/events/rapl.c | 120 ++++++++++++++++++++---------------------
1 file changed, 60 insertions(+), 60 deletions(-)
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -41,7 +43,16 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
"pp0-core",
"package",
"dram",
@@ -126,16 +126,16 @@ enum rapl_unit_quirk {
@@ -112,7 +112,7 @@ static struct perf_pmu_events_attr event
* considered as either pkg-scope or die-scope, and we are considering
* them as die-scope.
*/
-#define rapl_pmu_is_pkg_scope() \
+#define rapl_pkg_pmu_is_pkg_scope() \
(boot_cpu_data.x86_vendor == X86_VENDOR_AMD || \
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
@@ -139,16 +139,16 @@ enum rapl_unit_quirk {
};
struct rapl_model {
@@ -63,25 +74,18 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
static u64 rapl_timer_ms;
static struct perf_msr *rapl_msrs;
static struct rapl_model *rapl_model;
@@ -149,7 +149,7 @@ static struct rapl_model *rapl_model;
* considered as either pkg-scope or die-scope, and we are considering
* them as die-scope.
*/
-#define rapl_pmu_is_pkg_scope() \
+#define rapl_pkg_pmu_is_pkg_scope() \
(boot_cpu_data.x86_vendor == X86_VENDOR_AMD || \
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
@@ -159,7 +159,7 @@ static struct rapl_model *rapl_model;
*/
static inline unsigned int get_rapl_pmu_idx(int cpu)
{
@@ -163,8 +163,8 @@ static inline unsigned int get_rapl_pmu_
* (for non-existent mappings in topology map) to UINT_MAX, so
* the error check in the caller is simplified.
*/
- return rapl_pmu_is_pkg_scope() ? topology_logical_package_id(cpu) :
- topology_logical_die_id(cpu);
+ return rapl_pkg_pmu_is_pkg_scope() ? topology_logical_package_id(cpu) :
topology_logical_die_id(cpu);
+ topology_logical_die_id(cpu);
}
@@ -172,7 +172,7 @@ static inline u64 rapl_read_counter(stru
static inline u64 rapl_read_counter(struct perf_event *event)
@@ -176,7 +176,7 @@ static inline u64 rapl_read_counter(stru
static inline u64 rapl_scale(u64 v, int cfg)
{
@@ -90,7 +94,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
pr_warn("Invalid domain %d, failed to scale data\n", cfg);
return v;
}
@@ -182,7 +182,7 @@ static inline u64 rapl_scale(u64 v, int
@@ -186,7 +186,7 @@ static inline u64 rapl_scale(u64 v, int
* or use ldexp(count, -32).
* Watts = Joules/Time delta
*/
@@ -99,8 +103,8 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
}
static u64 rapl_event_update(struct perf_event *event)
@@ -342,7 +342,7 @@ static int rapl_pmu_event_init(struct pe
struct rapl_pmu *rapl_pmu;
@@ -347,7 +347,7 @@ static int rapl_pmu_event_init(struct pe
unsigned int rapl_pmu_idx;
/* only look at RAPL events */
- if (event->attr.type != rapl_pmus->pmu.type)
@@ -108,7 +112,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
return -ENOENT;
/* check only supported bits are set */
@@ -352,14 +352,14 @@ static int rapl_pmu_event_init(struct pe
@@ -357,14 +357,14 @@ static int rapl_pmu_event_init(struct pe
if (event->cpu < 0)
return -EINVAL;
@@ -126,7 +130,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
return -EINVAL;
/* unsupported modes and filters */
@@ -367,11 +367,11 @@ static int rapl_pmu_event_init(struct pe
@@ -372,11 +372,11 @@ static int rapl_pmu_event_init(struct pe
return -EINVAL;
rapl_pmu_idx = get_rapl_pmu_idx(event->cpu);
@@ -140,7 +144,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
if (!rapl_pmu)
return -EINVAL;
@@ -525,11 +525,11 @@ static struct perf_msr intel_rapl_spr_ms
@@ -530,11 +530,11 @@ static struct perf_msr intel_rapl_spr_ms
};
/*
@@ -155,7 +159,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
[PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, NULL, false, 0 },
[PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK },
[PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, NULL, false, 0 },
@@ -545,8 +545,8 @@ static int rapl_check_hw_unit(void)
@@ -550,8 +550,8 @@ static int rapl_check_hw_unit(void)
/* protect rdmsrl() to handle virtualization */
if (rdmsrl_safe(rapl_model->msr_power_unit, &msr_rapl_power_unit_bits))
return -1;
@@ -166,7 +170,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
switch (rapl_model->unit_quirk) {
/*
@@ -556,11 +556,11 @@ static int rapl_check_hw_unit(void)
@@ -561,11 +561,11 @@ static int rapl_check_hw_unit(void)
* of 2. Datasheet, September 2014, Reference Number: 330784-001 "
*/
case RAPL_UNIT_QUIRK_INTEL_HSW:
@@ -180,7 +184,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
break;
default:
break;
@@ -575,9 +575,9 @@ static int rapl_check_hw_unit(void)
@@ -580,9 +580,9 @@ static int rapl_check_hw_unit(void)
* if hw unit is 32, then we use 2 ms 1/200/2
*/
rapl_timer_ms = 2;
@@ -192,7 +196,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
}
return 0;
}
@@ -587,12 +587,12 @@ static void __init rapl_advertise(void)
@@ -592,12 +592,12 @@ static void __init rapl_advertise(void)
int i;
pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n",
@@ -209,7 +213,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
}
}
}
@@ -681,71 +681,71 @@ static int __init init_rapl_pmus(struct
@@ -678,71 +678,71 @@ static int __init init_rapl_pmus(struct
}
static struct rapl_model model_snb = {
@@ -297,7 +301,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
};
static const struct x86_cpu_id rapl_model_match[] __initconst = {
@@ -801,11 +801,11 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_m
@@ -798,11 +798,11 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_m
static int __init rapl_pmu_init(void)
{
const struct x86_cpu_id *id;
@@ -312,7 +316,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
id = x86_match_cpu(rapl_model_match);
if (!id)
@@ -813,20 +813,20 @@ static int __init rapl_pmu_init(void)
@@ -810,20 +810,20 @@ static int __init rapl_pmu_init(void)
rapl_model = (struct rapl_model *) id->driver_data;
@@ -338,7 +342,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
if (ret)
goto out;
@@ -835,14 +835,14 @@ static int __init rapl_pmu_init(void)
@@ -832,14 +832,14 @@ static int __init rapl_pmu_init(void)
out:
pr_warn("Initialization failed (%d), disabled\n", ret);

View File

@@ -1,87 +0,0 @@
From b8e1231d5f78314de8f9066baba7b1fdd5e59218 Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Date: Fri, 13 Sep 2024 15:21:42 +0000
Subject: perf/x86/rapl: Remove the cpu_to_rapl_pmu() function
Preparation for the addition of per-core RAPL energy counter support for
AMD CPUs. Post which, one cpu might be mapped to more than one rapl_pmu
(package/die one or per-core one), also makes sense to use the
get_rapl_pmu_idx macro which is anyway used to index into the
rapl_pmus->pmus[] array.
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
---
arch/x86/events/rapl.c | 29 +++++++++++++----------------
1 file changed, 13 insertions(+), 16 deletions(-)
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -162,17 +162,6 @@ static inline unsigned int get_rapl_pmu_
topology_logical_die_id(cpu);
}
-static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
-{
- unsigned int rapl_pmu_idx = get_rapl_pmu_idx(cpu);
-
- /*
- * The unsigned check also catches the '-1' return value for non
- * existent mappings in the topology map.
- */
- return rapl_pmu_idx < rapl_pmus->nr_rapl_pmu ? rapl_pmus->pmus[rapl_pmu_idx] : NULL;
-}
-
static inline u64 rapl_read_counter(struct perf_event *event)
{
u64 raw;
@@ -348,7 +337,7 @@ static void rapl_pmu_event_del(struct pe
static int rapl_pmu_event_init(struct perf_event *event)
{
u64 cfg = event->attr.config & RAPL_EVENT_MASK;
- int bit, ret = 0;
+ int bit, rapl_pmu_idx, ret = 0;
struct rapl_pmu *pmu;
/* only look at RAPL events */
@@ -376,8 +365,12 @@ static int rapl_pmu_event_init(struct pe
if (event->attr.sample_period) /* no sampling */
return -EINVAL;
+ rapl_pmu_idx = get_rapl_pmu_idx(event->cpu);
+ if (rapl_pmu_idx >= rapl_pmus->nr_rapl_pmu)
+ return -EINVAL;
+
/* must be done before validate_group */
- pmu = cpu_to_rapl_pmu(event->cpu);
+ pmu = rapl_pmus->pmus[rapl_pmu_idx];
if (!pmu)
return -EINVAL;
event->pmu_private = pmu;
@@ -623,12 +616,16 @@ static const struct attribute_group *rap
static void __init init_rapl_pmu(void)
{
struct rapl_pmu *pmu;
- int cpu;
+ int cpu, rapl_pmu_idx;
cpus_read_lock();
for_each_cpu(cpu, cpu_online_mask) {
- pmu = cpu_to_rapl_pmu(cpu);
+ rapl_pmu_idx = get_rapl_pmu_idx(cpu);
+ if (rapl_pmu_idx >= rapl_pmus->nr_rapl_pmu)
+ continue;
+
+ pmu = rapl_pmus->pmus[rapl_pmu_idx];
if (pmu)
continue;
pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
@@ -640,7 +637,7 @@ static void __init init_rapl_pmu(void)
pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
rapl_hrtimer_init(pmu);
- rapl_pmus->pmus[get_rapl_pmu_idx(cpu)] = pmu;
+ rapl_pmus->pmus[rapl_pmu_idx] = pmu;
}
cpus_read_unlock();

View File

@@ -1,23 +1,24 @@
From dbc0343069c8f86fad0d8d9075f70f79114ef10a Mon Sep 17 00:00:00 2001
From ea8de8012d6d6ef2c24c45a56f230e3a7fcb8ce7 Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Date: Fri, 13 Sep 2024 15:47:59 +0000
Date: Fri, 15 Nov 2024 06:08:04 +0000
Subject: perf/x86/rapl: Remove the global variable rapl_msrs
Prepare for the addition of RAPL core energy counter support.
After making the rapl_model struct global, the rapl_msrs global
variable isn't needed, so remove it.
Also it will be cleaner when new per-core scope PMU is added. As we will
need to maintain two rapl_msrs array(one for per-core scope and one for
package scope PMU), inside the rapl_model struct.
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Zhang Rui <rui.zhang@intel.com>
---
arch/x86/events/rapl.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -137,7 +137,6 @@ static int rapl_pkg_hw_unit[NR_RAPL_PKG_
@@ -150,7 +150,6 @@ static int rapl_pkg_hw_unit[NR_RAPL_PKG_
static struct rapl_pmus *rapl_pmus_pkg;
static unsigned int rapl_pkg_cntr_mask;
static u64 rapl_timer_ms;
@@ -25,7 +26,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
static struct rapl_model *rapl_model;
/*
@@ -376,7 +375,7 @@ static int rapl_pmu_event_init(struct pe
@@ -381,7 +380,7 @@ static int rapl_pmu_event_init(struct pe
return -EINVAL;
event->pmu_private = rapl_pmu;
@@ -34,7 +35,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
event->hw.config = cfg;
event->hw.idx = bit;
@@ -813,9 +812,7 @@ static int __init rapl_pmu_init(void)
@@ -810,9 +809,7 @@ static int __init rapl_pmu_init(void)
rapl_model = (struct rapl_model *) id->driver_data;

View File

@@ -1,24 +1,26 @@
From d6a5a28382558b896767a78db795d421015831a7 Mon Sep 17 00:00:00 2001
From 8cbd0e18e5b36bbfdb5ca8931a7668c7963be8e5 Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Date: Fri, 13 Sep 2024 15:48:00 +0000
Date: Fri, 15 Nov 2024 06:08:05 +0000
Subject: perf/x86/rapl: Move the cntr_mask to rapl_pmus struct
Preparation for the addition of per-core RAPL energy counter for AMD
CPUs.
Prepare for the addition of RAPL core energy counter support.
Moving cntr_mask to rapl_pmus struct instead of adding a new global
cntr_mask for the per-core RAPL energy counter, will ensure that the
"per_core_cntr_mask" is only created if needed (i.e. in case of AMD
CPUs).
Move cntr_mask to rapl_pmus struct instead of adding a new global
cntr_mask for the new RAPL power_core PMU. This will also ensure that
the second "core_cntr_mask" is only created if needed (i.e. in case of
AMD CPUs).
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Zhang Rui <rui.zhang@intel.com>
---
arch/x86/events/rapl.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -116,6 +116,7 @@ struct rapl_pmu {
@@ -129,6 +129,7 @@ struct rapl_pmu {
struct rapl_pmus {
struct pmu pmu;
unsigned int nr_rapl_pmu;
@@ -26,7 +28,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
struct rapl_pmu *rapl_pmu[] __counted_by(nr_rapl_pmu);
};
@@ -135,7 +136,6 @@ struct rapl_model {
@@ -148,7 +149,6 @@ struct rapl_model {
/* 1/2^hw_unit Joule */
static int rapl_pkg_hw_unit[NR_RAPL_PKG_DOMAINS] __read_mostly;
static struct rapl_pmus *rapl_pmus_pkg;
@@ -34,7 +36,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
static u64 rapl_timer_ms;
static struct rapl_model *rapl_model;
@@ -358,7 +358,7 @@ static int rapl_pmu_event_init(struct pe
@@ -363,7 +363,7 @@ static int rapl_pmu_event_init(struct pe
bit = cfg - 1;
/* check event supported */
@@ -43,7 +45,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
return -EINVAL;
/* unsupported modes and filters */
@@ -586,10 +586,10 @@ static void __init rapl_advertise(void)
@@ -591,10 +591,10 @@ static void __init rapl_advertise(void)
int i;
pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n",
@@ -56,7 +58,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
pr_info("hw unit of domain %s 2^-%d Joules\n",
rapl_pkg_domain_names[i], rapl_pkg_hw_unit[i]);
}
@@ -812,9 +812,6 @@ static int __init rapl_pmu_init(void)
@@ -809,9 +809,6 @@ static int __init rapl_pmu_init(void)
rapl_model = (struct rapl_model *) id->driver_data;
@@ -66,7 +68,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
ret = rapl_check_hw_unit();
if (ret)
return ret;
@@ -823,6 +820,10 @@ static int __init rapl_pmu_init(void)
@@ -820,6 +817,10 @@ static int __init rapl_pmu_init(void)
if (ret)
return ret;

View File

@@ -1,39 +1,43 @@
From 3cb480ec2950f4c6351c602552fc4f9a8e524b89 Mon Sep 17 00:00:00 2001
From 27bdf22a4c7815831d38acd1cb08e5aa6ce95ea0 Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Date: Fri, 13 Sep 2024 15:48:01 +0000
Subject: perf/x86/rapl: Add per-core energy counter support for AMD CPUs
Date: Fri, 15 Nov 2024 06:08:06 +0000
Subject: perf/x86/rapl: Add core energy counter support for AMD CPUs
Add a new "power_per_core" PMU and "energy-per-core" event for
monitoring energy consumption by each core. The existing energy-cores
event aggregates the energy consumption at the package level.
This new event aligns with the AMD's per_core energy counters.
Add a new "power_core" PMU and "energy-core" event for monitoring
energy consumption by each individual core. The existing energy-cores
event aggregates the energy consumption of CPU cores at the package level.
This new event aligns with the AMD's per-core energy counters.
Tested the package level and core level PMU counters with workloads
pinned to different CPUs.
Results with workload pinned to CPU 1 in core 1 on a AMD Zen4 Genoa
Results with workload pinned to CPU 4 in core 4 on an AMD Zen4 Genoa
machine:
$ perf stat -a --per-core -e power_per_core/energy-per-core/ sleep 1
$ sudo perf stat --per-core -e power_core/energy-core/ -- taskset -c 4 stress-ng --matrix 1 --timeout 5s
stress-ng: info: [21250] setting to a 5 second run per stressor
stress-ng: info: [21250] dispatching hogs: 1 matrix
stress-ng: info: [21250] successful run completed in 5.00s
Performance counter stats for 'system wide':
S0-D0-C0 1 0.02 Joules power_per_core/energy-per-core/
S0-D0-C1 1 5.72 Joules power_per_core/energy-per-core/
S0-D0-C2 1 0.02 Joules power_per_core/energy-per-core/
S0-D0-C3 1 0.02 Joules power_per_core/energy-per-core/
S0-D0-C4 1 0.02 Joules power_per_core/energy-per-core/
S0-D0-C5 1 0.02 Joules power_per_core/energy-per-core/
S0-D0-C6 1 0.02 Joules power_per_core/energy-per-core/
S0-D0-C7 1 0.02 Joules power_per_core/energy-per-core/
S0-D0-C8 1 0.02 Joules power_per_core/energy-per-core/
S0-D0-C9 1 0.02 Joules power_per_core/energy-per-core/
S0-D0-C10 1 0.02 Joules power_per_core/energy-per-core/
S0-D0-C0 1 0.00 Joules power_core/energy-core/
S0-D0-C1 1 0.00 Joules power_core/energy-core/
S0-D0-C2 1 0.00 Joules power_core/energy-core/
S0-D0-C3 1 0.00 Joules power_core/energy-core/
S0-D0-C4 1 8.43 Joules power_core/energy-core/
S0-D0-C5 1 0.00 Joules power_core/energy-core/
S0-D0-C6 1 0.00 Joules power_core/energy-core/
S0-D0-C7 1 0.00 Joules power_core/energy-core/
S0-D1-C8 1 0.00 Joules power_core/energy-core/
S0-D1-C9 1 0.00 Joules power_core/energy-core/
S0-D1-C10 1 0.00 Joules power_core/energy-core/
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
---
arch/x86/events/rapl.c | 178 +++++++++++++++++++++++++++++++++--------
1 file changed, 143 insertions(+), 35 deletions(-)
arch/x86/events/rapl.c | 185 +++++++++++++++++++++++++++++++++--------
1 file changed, 152 insertions(+), 33 deletions(-)
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -41,8 +45,8 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
* event: rapl_energy_psys
* perf code: 0x5
*
+ * per_core counter: consumption of a single physical core
+ * event: rapl_energy_per_core (power_per_core PMU)
+ * core counter: consumption of a single physical core
+ * event: rapl_energy_core (power_core PMU)
+ * perf code: 0x1
+ *
* We manage those counters as free running (read-only). They may be
@@ -52,7 +56,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
NR_RAPL_PKG_DOMAINS = PERF_RAPL_PKG_EVENTS_MAX,
};
+#define PERF_RAPL_PER_CORE 0 /* per-core */
+#define PERF_RAPL_CORE 0 /* single core */
+#define PERF_RAPL_CORE_EVENTS_MAX 1
+#define NR_RAPL_CORE_DOMAINS PERF_RAPL_CORE_EVENTS_MAX
+
@@ -63,12 +67,12 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
"psys",
};
+static const char *const rapl_core_domain_name __initconst = "per-core";
+static const char *const rapl_core_domain_name __initconst = "core";
+
/*
* event code: LSB 8 bits, passed in attr->config
* any other bit is reserved
@@ -128,14 +138,18 @@ enum rapl_unit_quirk {
@@ -141,14 +151,18 @@ enum rapl_unit_quirk {
struct rapl_model {
struct perf_msr *rapl_pkg_msrs;
@@ -87,25 +91,35 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
static u64 rapl_timer_ms;
static struct rapl_model *rapl_model;
@@ -156,10 +170,14 @@ static struct rapl_model *rapl_model;
@@ -156,14 +170,23 @@ static struct rapl_model *rapl_model;
* Helper function to get the correct topology id according to the
* RAPL PMU scope.
*/
-static inline unsigned int get_rapl_pmu_idx(int cpu)
-{ /*
+static inline unsigned int get_rapl_pmu_idx(int cpu, int scope)
{
+{
+ /*
* Returns unsigned int, which converts the '-1' return value
* (for non-existent mappings in topology map) to UINT_MAX, so
* the error check in the caller is simplified.
*/
- return rapl_pkg_pmu_is_pkg_scope() ? topology_logical_package_id(cpu) :
- topology_logical_die_id(cpu);
+ if (scope == PERF_PMU_SCOPE_PKG)
- topology_logical_die_id(cpu);
+ switch (scope) {
+ case PERF_PMU_SCOPE_PKG:
+ return topology_logical_package_id(cpu);
+ else if (scope == PERF_PMU_SCOPE_DIE)
+ case PERF_PMU_SCOPE_DIE:
+ return topology_logical_die_id(cpu);
+ else
+ case PERF_PMU_SCOPE_CORE:
+ return topology_logical_core_id(cpu);
+ default:
+ return -EINVAL;
+ }
}
static inline u64 rapl_read_counter(struct perf_event *event)
@@ -169,19 +187,20 @@ static inline u64 rapl_read_counter(stru
@@ -173,19 +196,20 @@ static inline u64 rapl_read_counter(stru
return raw;
}
@@ -132,7 +146,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
}
static u64 rapl_event_update(struct perf_event *event)
@@ -208,7 +227,7 @@ static u64 rapl_event_update(struct perf
@@ -212,7 +236,7 @@ static u64 rapl_event_update(struct perf
delta = (new_raw_count << shift) - (prev_raw_count << shift);
delta >>= shift;
@@ -141,13 +155,14 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
local64_add(sdelta, &event->count);
@@ -337,12 +356,13 @@ static void rapl_pmu_event_del(struct pe
@@ -341,13 +365,14 @@ static void rapl_pmu_event_del(struct pe
static int rapl_pmu_event_init(struct perf_event *event)
{
u64 cfg = event->attr.config & RAPL_EVENT_MASK;
- int bit, rapl_pmu_idx, ret = 0;
+ int bit, rapl_pmus_scope, rapl_pmu_idx, ret = 0;
- int bit, ret = 0;
+ int bit, rapl_pmus_scope, ret = 0;
struct rapl_pmu *rapl_pmu;
unsigned int rapl_pmu_idx;
+ struct rapl_pmus *rapl_pmus;
- /* only look at RAPL events */
@@ -159,7 +174,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
/* check only supported bits are set */
if (event->attr.config & ~RAPL_EVENT_MASK)
@@ -351,31 +371,49 @@ static int rapl_pmu_event_init(struct pe
@@ -356,31 +381,49 @@ static int rapl_pmu_event_init(struct pe
if (event->cpu < 0)
return -EINVAL;
@@ -186,7 +201,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+ bit = cfg - 1;
+ event->hw.event_base = rapl_model->rapl_pkg_msrs[bit].msr;
+ } else if (rapl_pmus_scope == PERF_PMU_SCOPE_CORE) {
+ /* only look at RAPL per-core events */
+ /* only look at RAPL core events */
+ if (event->attr.type != rapl_pmus_core->pmu.type)
+ return -ENOENT;
+
@@ -222,34 +237,34 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
event->hw.config = cfg;
event->hw.idx = bit;
@@ -392,12 +430,14 @@ RAPL_EVENT_ATTR_STR(energy-pkg , rapl
@@ -397,12 +440,14 @@ RAPL_EVENT_ATTR_STR(energy-pkg , rapl
RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04");
RAPL_EVENT_ATTR_STR(energy-psys, rapl_psys, "event=0x05");
+RAPL_EVENT_ATTR_STR(energy-per-core, rapl_per_core, "event=0x01");
+RAPL_EVENT_ATTR_STR(energy-core, rapl_core, "event=0x01");
RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules");
RAPL_EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules");
RAPL_EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules");
RAPL_EVENT_ATTR_STR(energy-psys.unit, rapl_psys_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-per-core.unit, rapl_per_core_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-core.unit, rapl_core_unit, "Joules");
/*
* we compute in 0.23 nJ increments regardless of MSR
@@ -407,6 +447,7 @@ RAPL_EVENT_ATTR_STR(energy-pkg.scale,
@@ -412,6 +457,7 @@ RAPL_EVENT_ATTR_STR(energy-pkg.scale,
RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10");
RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10");
RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-per-core.scale, rapl_per_core_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-core.scale, rapl_core_scale, "2.3283064365386962890625e-10");
/*
* There are no default events, but we need to create
@@ -439,6 +480,12 @@ static const struct attribute_group *rap
@@ -444,6 +490,12 @@ static const struct attribute_group *rap
NULL,
};
+static const struct attribute_group *rapl_per_core_attr_groups[] = {
+static const struct attribute_group *rapl_core_attr_groups[] = {
+ &rapl_pmu_format_group,
+ &rapl_pmu_events_group,
+ NULL,
@@ -258,38 +273,38 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
static struct attribute *rapl_events_cores[] = {
EVENT_PTR(rapl_cores),
EVENT_PTR(rapl_cores_unit),
@@ -499,6 +546,18 @@ static struct attribute_group rapl_event
@@ -504,6 +556,18 @@ static struct attribute_group rapl_event
.attrs = rapl_events_psys,
};
+static struct attribute *rapl_events_per_core[] = {
+ EVENT_PTR(rapl_per_core),
+ EVENT_PTR(rapl_per_core_unit),
+ EVENT_PTR(rapl_per_core_scale),
+static struct attribute *rapl_events_core[] = {
+ EVENT_PTR(rapl_core),
+ EVENT_PTR(rapl_core_unit),
+ EVENT_PTR(rapl_core_scale),
+ NULL,
+};
+
+static struct attribute_group rapl_events_per_core_group = {
+static struct attribute_group rapl_events_core_group = {
+ .name = "events",
+ .attrs = rapl_events_per_core,
+ .attrs = rapl_events_core,
+};
+
static bool test_msr(int idx, void *data)
{
return test_bit(idx, (unsigned long *) data);
@@ -536,6 +595,11 @@ static struct perf_msr amd_rapl_pkg_msrs
@@ -541,6 +605,11 @@ static struct perf_msr amd_rapl_pkg_msrs
[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 },
};
+static struct perf_msr amd_rapl_core_msrs[] = {
+ [PERF_RAPL_PER_CORE] = { MSR_AMD_CORE_ENERGY_STATUS, &rapl_events_per_core_group,
+ [PERF_RAPL_CORE] = { MSR_AMD_CORE_ENERGY_STATUS, &rapl_events_core_group,
+ test_msr, false, RAPL_MSR_MASK },
+};
+
static int rapl_check_hw_unit(void)
{
u64 msr_rapl_power_unit_bits;
@@ -547,6 +611,8 @@ static int rapl_check_hw_unit(void)
@@ -552,6 +621,8 @@ static int rapl_check_hw_unit(void)
for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++)
rapl_pkg_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
@@ -298,7 +313,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
switch (rapl_model->unit_quirk) {
/*
* DRAM domain on HSW server and KNL has fixed energy unit which can be
@@ -565,7 +631,6 @@ static int rapl_check_hw_unit(void)
@@ -570,7 +641,6 @@ static int rapl_check_hw_unit(void)
break;
}
@@ -306,7 +321,7 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
/*
* Calculate the timer rate:
* Use reference of 200W for scaling the timeout to avoid counter
@@ -584,9 +649,13 @@ static int rapl_check_hw_unit(void)
@@ -589,9 +659,13 @@ static int rapl_check_hw_unit(void)
static void __init rapl_advertise(void)
{
int i;
@@ -321,42 +336,30 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++) {
if (rapl_pmus_pkg->cntr_mask & (1 << i)) {
@@ -594,6 +663,10 @@ static void __init rapl_advertise(void)
@@ -599,6 +673,10 @@ static void __init rapl_advertise(void)
rapl_pkg_domain_names[i], rapl_pkg_hw_unit[i]);
}
}
+
+ if (rapl_pmus_core && (rapl_pmus_core->cntr_mask & (1 << PERF_RAPL_PER_CORE)))
+ if (rapl_pmus_core && (rapl_pmus_core->cntr_mask & (1 << PERF_RAPL_CORE)))
+ pr_info("hw unit of domain %s 2^-%d Joules\n",
+ rapl_core_domain_name, rapl_core_hw_unit);
}
static void cleanup_rapl_pmus(struct rapl_pmus *rapl_pmus)
@@ -614,6 +687,10 @@ static const struct attribute_group *rap
@@ -619,6 +697,10 @@ static const struct attribute_group *rap
NULL,
};
+static const struct attribute_group *rapl_per_core_attr_update[] = {
+ &rapl_events_per_core_group,
+static const struct attribute_group *rapl_core_attr_update[] = {
+ &rapl_events_core_group,
+};
+
static void __init init_rapl_pmu(struct rapl_pmus *rapl_pmus)
static int __init init_rapl_pmu(struct rapl_pmus *rapl_pmus)
{
struct rapl_pmu *rapl_pmu;
@@ -622,10 +699,9 @@ static void __init init_rapl_pmu(struct
cpus_read_lock();
for_each_cpu(cpu, cpu_online_mask) {
- rapl_pmu_idx = get_rapl_pmu_idx(cpu);
+ rapl_pmu_idx = get_rapl_pmu_idx(cpu, rapl_pmus->pmu.scope);
if (rapl_pmu_idx >= rapl_pmus->nr_rapl_pmu)
continue;
-
rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx];
if (rapl_pmu)
continue;
@@ -644,15 +720,19 @@ static void __init init_rapl_pmu(struct
cpus_read_unlock();
@@ -645,13 +727,22 @@ free:
return -ENOMEM;
}
-static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int rapl_pmu_scope)
@@ -364,31 +367,33 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
+ const struct attribute_group **rapl_attr_groups,
+ const struct attribute_group **rapl_attr_update)
{
int nr_rapl_pmu;
int nr_rapl_pmu = topology_max_packages();
struct rapl_pmus *rapl_pmus;
if (rapl_pmu_scope == PERF_PMU_SCOPE_PKG)
nr_rapl_pmu = topology_max_packages();
- else
+ else if (rapl_pmu_scope == PERF_PMU_SCOPE_DIE)
nr_rapl_pmu = topology_max_packages() * topology_max_dies_per_package();
+ else
+ nr_rapl_pmu = topology_max_packages() * topology_num_cores_per_package();
+ /*
+ * rapl_pmu_scope must be either PKG, DIE or CORE
+ */
if (rapl_pmu_scope == PERF_PMU_SCOPE_DIE)
nr_rapl_pmu *= topology_max_dies_per_package();
+ else if (rapl_pmu_scope == PERF_PMU_SCOPE_CORE)
+ nr_rapl_pmu *= topology_num_cores_per_package();
+ else if (rapl_pmu_scope != PERF_PMU_SCOPE_PKG)
+ return -EINVAL;
rapl_pmus = kzalloc(struct_size(rapl_pmus, rapl_pmu, nr_rapl_pmu), GFP_KERNEL);
if (!rapl_pmus)
@@ -743,8 +823,10 @@ static struct rapl_model model_spr = {
@@ -740,8 +831,10 @@ static struct rapl_model model_spr = {
static struct rapl_model model_amd_hygon = {
.pkg_events = BIT(PERF_RAPL_PKG),
+ .core_events = BIT(PERF_RAPL_PER_CORE),
+ .core_events = BIT(PERF_RAPL_CORE),
.msr_power_unit = MSR_AMD_RAPL_POWER_UNIT,
.rapl_pkg_msrs = amd_rapl_pkg_msrs,
+ .rapl_core_msrs = amd_rapl_core_msrs,
};
static const struct x86_cpu_id rapl_model_match[] __initconst = {
@@ -816,7 +898,8 @@ static int __init rapl_pmu_init(void)
@@ -813,7 +906,8 @@ static int __init rapl_pmu_init(void)
if (ret)
return ret;
@@ -398,35 +403,35 @@ Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
if (ret)
return ret;
@@ -828,6 +911,27 @@ static int __init rapl_pmu_init(void)
@@ -825,6 +919,27 @@ static int __init rapl_pmu_init(void)
if (ret)
goto out;
+ if (rapl_model->core_events) {
+ ret = init_rapl_pmus(&rapl_pmus_core, PERF_PMU_SCOPE_CORE,
+ rapl_per_core_attr_groups,
+ rapl_per_core_attr_update);
+ rapl_core_attr_groups,
+ rapl_core_attr_update);
+ if (ret) {
+ pr_warn("Per-core PMU initialization failed (%d)\n", ret);
+ goto per_core_init_failed;
+ pr_warn("power-core PMU initialization failed (%d)\n", ret);
+ goto core_init_failed;
+ }
+
+ rapl_pmus_core->cntr_mask = perf_msr_probe(rapl_model->rapl_core_msrs,
+ PERF_RAPL_CORE_EVENTS_MAX, false,
+ (void *) &rapl_model->core_events);
+
+ ret = perf_pmu_register(&rapl_pmus_core->pmu, "power_per_core", -1);
+ ret = perf_pmu_register(&rapl_pmus_core->pmu, "power_core", -1);
+ if (ret) {
+ pr_warn("Per-core PMU registration failed (%d)\n", ret);
+ pr_warn("power-core PMU registration failed (%d)\n", ret);
+ cleanup_rapl_pmus(rapl_pmus_core);
+ }
+ }
+
+per_core_init_failed:
+core_init_failed:
rapl_advertise();
return 0;
@@ -840,6 +944,10 @@ module_init(rapl_pmu_init);
@@ -837,6 +952,10 @@ module_init(rapl_pmu_init);
static void __exit intel_rapl_exit(void)
{

View File

@@ -0,0 +1,21 @@
From c66779234fa7ba71cefa1c4eb283db53a3cb5303 Mon Sep 17 00:00:00 2001
From: Oleksandr Natalenko <oleksandr@natalenko.name>
Date: Sat, 23 Nov 2024 12:45:28 +0100
Subject: amd-rapl-6.12: fix clang-built kernel not booting
Link: https://lore.kernel.org/lkml/7eaf557d-7e85-4fd3-abee-f84ac01d92c1@amd.com/
Signed-off-by: Oleksandr Natalenko <oleksandr@natalenko.name>
---
arch/x86/events/rapl.c | 1 +
1 file changed, 1 insertion(+)
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -699,6 +699,7 @@ static const struct attribute_group *rap
static const struct attribute_group *rapl_core_attr_update[] = {
&rapl_events_core_group,
+ NULL,
};
static int __init init_rapl_pmu(struct rapl_pmus *rapl_pmus)