From 3521ba1cc351e80488c3f85748c92c3853b75818 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Sun, 17 Apr 2016 15:03:01 -0700 Subject: powercap, perf/x86/intel/rapl: Add PSys support Skylake processor supports a new set of RAPL registers for controlling entire SoC instead of just CPU package. This is useful for thermal and power control when source of power/thermal is not just CPU/GPU. This change adds a new platform domain (AKA PSys) to the current power capping Intel RAPL driver. PSys also supports PL1 (long term) and PL2 (short term) control like package domain. This also follows same MSRs for energy and time units as package domain. Unlike package domain, PSys support requires more than just processor level implementation. The other parts in the system need additional implementation, which OEMs needs to support. So not all Skylake systems will support PSys. Signed-off-by: Srinivas Pandruvada Signed-off-by: Peter Zijlstra (Intel) Acked-by: Rafael J. Wysocki Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: bp@alien8.de Cc: hpa@zytor.com Cc: jacob.jun.pan@linux.intel.com Cc: rjw@rjwysocki.net Link: http://lkml.kernel.org/r/1460930581-29748-3-git-send-email-srinivas.pandruvada@linux.intel.com Signed-off-by: Ingo Molnar --- drivers/powercap/intel_rapl.c | 69 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'drivers') diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index 8fad0a7044d3..f2201d42a9cd 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -34,6 +34,9 @@ #include #include +/* Local defines */ +#define MSR_PLATFORM_POWER_LIMIT 0x0000065C + /* bitmasks for RAPL MSRs, used by primitive access functions */ #define ENERGY_STATUS_MASK 0xffffffff @@ -86,6 +89,7 @@ enum rapl_domain_type { RAPL_DOMAIN_PP0, /* core power plane */ RAPL_DOMAIN_PP1, /* graphics uncore */ RAPL_DOMAIN_DRAM,/* DRAM control_type */ + RAPL_DOMAIN_PLATFORM, /* PSys control_type */ RAPL_DOMAIN_MAX, }; @@ -251,9 +255,11 @@ static const char * const rapl_domain_names[] = { "core", "uncore", "dram", + "psys", }; static struct powercap_control_type *control_type; /* PowerCap Controller */ +static struct rapl_domain *platform_rapl_domain; /* Platform (PSys) domain */ /* caller to ensure CPU hotplug lock is held */ static struct rapl_package *find_package_by_id(int id) @@ -409,6 +415,14 @@ static const struct powercap_zone_ops zone_ops[] = { .set_enable = set_domain_enable, .get_enable = get_domain_enable, }, + /* RAPL_DOMAIN_PLATFORM */ + { + .get_energy_uj = get_energy_counter, + .get_max_energy_range_uj = get_max_energy_counter, + .release = release_zone, + .set_enable = set_domain_enable, + .get_enable = get_domain_enable, + }, }; static int set_power_limit(struct powercap_zone *power_zone, int id, @@ -1160,6 +1174,13 @@ static int rapl_unregister_powercap(void) powercap_unregister_zone(control_type, &rd_package->power_zone); } + + if (platform_rapl_domain) { + powercap_unregister_zone(control_type, + &platform_rapl_domain->power_zone); + kfree(platform_rapl_domain); + } + powercap_unregister_control_type(control_type); return 0; @@ -1239,6 +1260,47 @@ err_cleanup: return ret; } +static int rapl_register_psys(void) +{ + struct rapl_domain *rd; + struct powercap_zone *power_zone; + u64 val; + + if (rdmsrl_safe_on_cpu(0, MSR_PLATFORM_ENERGY_STATUS, &val) || !val) + return -ENODEV; + + if (rdmsrl_safe_on_cpu(0, MSR_PLATFORM_POWER_LIMIT, &val) || !val) + return -ENODEV; + + rd = kzalloc(sizeof(*rd), GFP_KERNEL); + if (!rd) + return -ENOMEM; + + rd->name = rapl_domain_names[RAPL_DOMAIN_PLATFORM]; + rd->id = RAPL_DOMAIN_PLATFORM; + rd->msrs[0] = MSR_PLATFORM_POWER_LIMIT; + rd->msrs[1] = MSR_PLATFORM_ENERGY_STATUS; + rd->rpl[0].prim_id = PL1_ENABLE; + rd->rpl[0].name = pl1_name; + rd->rpl[1].prim_id = PL2_ENABLE; + rd->rpl[1].name = pl2_name; + rd->rp = find_package_by_id(0); + + power_zone = powercap_register_zone(&rd->power_zone, control_type, + "psys", NULL, + &zone_ops[RAPL_DOMAIN_PLATFORM], + 2, &constraint_ops); + + if (IS_ERR(power_zone)) { + kfree(rd); + return PTR_ERR(power_zone); + } + + platform_rapl_domain = rd; + + return 0; +} + static int rapl_register_powercap(void) { struct rapl_domain *rd; @@ -1255,6 +1317,10 @@ static int rapl_register_powercap(void) list_for_each_entry(rp, &rapl_packages, plist) if (rapl_package_register_powercap(rp)) goto err_cleanup_package; + + /* Don't bail out if PSys is not supported */ + rapl_register_psys(); + return ret; err_cleanup_package: @@ -1289,6 +1355,9 @@ static int rapl_check_domain(int cpu, int domain) case RAPL_DOMAIN_DRAM: msr = MSR_DRAM_ENERGY_STATUS; break; + case RAPL_DOMAIN_PLATFORM: + /* PSYS(PLATFORM) is not a CPU domain, so avoid printng error */ + return -EINVAL; default: pr_err("invalid domain id %d\n", domain); return -EINVAL; -- cgit v1.2.3 From 5101ef20f0ef1de79091a1fdb6b1a7f07565545a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 26 Apr 2016 11:33:46 +0100 Subject: perf/arm: Special-case hetereogeneous CPUs Commit: 26657848502b7847 ("perf/core: Verify we have a single perf_hw_context PMU") forcefully prevents multiple PMUs from sharing perf_hw_context, as this generally doesn't make sense. It is a common bug for uncore PMUs to use perf_hw_context rather than perf_invalid_context, which this detects. However, systems exist with heterogeneous CPUs (and hence heterogeneous HW PMUs), for which sharing perf_hw_context is necessary, and possible in some limited cases. To make this work we have to perform some gymnastics, as we did in these commits: 66eb579e66ecfea5 ("perf: allow for PMU-specific event filtering") c904e32a69b7c779 ("arm: perf: filter unschedulable events") To allow those systems to work, we must allow PMUs for heterogeneous CPUs to share perf_hw_context, though we must still disallow sharing otherwise to detect the common misuse of perf_hw_context. This patch adds a new PERF_PMU_CAP_HETEROGENEOUS_CPUS for this, updates the core logic to account for this, and makes use of it in the arm_pmu code that is used for systems with heterogeneous CPUs. Comments are added to make the rationale clear and hopefully avoid accidental abuse. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Catalin Marinas Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20160426103346.GA20836@leverpostej Signed-off-by: Ingo Molnar --- drivers/perf/arm_pmu.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers') diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index f70090897fdf..f2d01d4d9364 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -847,6 +847,14 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu) if (!platform_get_irq(cpu_pmu->plat_device, 0)) cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + /* + * This is a CPU PMU potentially in a heterogeneous configuration (e.g. + * big.LITTLE). This is not an uncore PMU, and we have taken ctx + * sharing into account (e.g. with our pmu::filter_match callback and + * pmu::event_init group validation). + */ + cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_HETEROGENEOUS_CPUS; + return 0; out_unregister: -- cgit v1.2.3