summaryrefslogtreecommitdiff
path: root/arch/x86/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-01-21 10:52:03 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-01-21 10:52:03 -0800
commit6c4aa896ebee5edf2b35a9d071e5a468797f96d8 (patch)
treebe4307f37214a440755fb5791ca13a1da0499049 /arch/x86/include
parenta6640c8c2fc029f015c87672585931c6106971c1 (diff)
parentb709eb872e19a19607bbb6d2975bc264d59735cf (diff)
Merge tag 'perf-core-2025-01-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull performance events updates from Ingo Molnar: "Seqlock optimizations that arose in a perf context and were merged into the perf tree: - seqlock: Add raw_seqcount_try_begin (Suren Baghdasaryan) - mm: Convert mm_lock_seq to a proper seqcount (Suren Baghdasaryan) - mm: Introduce mmap_lock_speculate_{try_begin|retry} (Suren Baghdasaryan) - mm/gup: Use raw_seqcount_try_begin() (Peter Zijlstra) Core perf enhancements: - Reduce 'struct page' footprint of perf by mapping pages in advance (Lorenzo Stoakes) - Save raw sample data conditionally based on sample type (Yabin Cui) - Reduce sampling overhead by checking sample_type in perf_sample_save_callchain() and perf_sample_save_brstack() (Yabin Cui) - Export perf_exclude_event() (Namhyung Kim) Uprobes scalability enhancements: (Andrii Nakryiko) - Simplify find_active_uprobe_rcu() VMA checks - Add speculative lockless VMA-to-inode-to-uprobe resolution - Simplify session consumer tracking - Decouple return_instance list traversal and freeing - Ensure return_instance is detached from the list before freeing - Reuse return_instances between multiple uretprobes within task - Guard against kmemdup() failing in dup_return_instance() AMD core PMU driver enhancements: - Relax privilege filter restriction on AMD IBS (Namhyung Kim) AMD RAPL energy counters support: (Dhananjay Ugwekar) - Introduce topology_logical_core_id() (K Prateek Nayak) - Remove the unused get_rapl_pmu_cpumask() function - Remove the cpu_to_rapl_pmu() function - Rename rapl_pmu variables - Make rapl_model struct global - Add arguments to the init and cleanup functions - Modify the generic variable names to *_pkg* - Remove the global variable rapl_msrs - Move the cntr_mask to rapl_pmus struct - Add core energy counter support for AMD CPUs Intel core PMU driver enhancements: - Support RDPMC 'metrics clear mode' feature (Kan Liang) - Clarify adaptive PEBS processing (Kan Liang) - Factor out functions for PEBS records processing (Kan Liang) - Simplify the PEBS records processing for adaptive PEBS (Kan Liang) Intel uncore driver enhancements: (Kan Liang) - Convert buggy pmu->func_id use to pmu->registered - Support more units on Granite Rapids" * tag 'perf-core-2025-01-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits) perf: map pages in advance perf/x86/intel/uncore: Support more units on Granite Rapids perf/x86/intel/uncore: Clean up func_id perf/x86/intel: Support RDPMC metrics clear mode uprobes: Guard against kmemdup() failing in dup_return_instance() perf/x86: Relax privilege filter restriction on AMD IBS perf/core: Export perf_exclude_event() uprobes: Reuse return_instances between multiple uretprobes within task uprobes: Ensure return_instance is detached from the list before freeing uprobes: Decouple return_instance list traversal and freeing uprobes: Simplify session consumer tracking uprobes: add speculative lockless VMA-to-inode-to-uprobe resolution uprobes: simplify find_active_uprobe_rcu() VMA checks mm: introduce mmap_lock_speculate_{try_begin|retry} mm: convert mm_lock_seq to a proper seqcount mm/gup: Use raw_seqcount_try_begin() seqlock: add raw_seqcount_try_begin perf/x86/rapl: Add core energy counter support for AMD CPUs perf/x86/rapl: Move the cntr_mask to rapl_pmus struct perf/x86/rapl: Remove the global variable rapl_msrs ...
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/perf_event.h20
-rw-r--r--arch/x86/include/asm/processor.h1
-rw-r--r--arch/x86/include/asm/topology.h1
3 files changed, 20 insertions, 2 deletions
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index d95f902acc52..1ac79f361645 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -41,6 +41,7 @@
#define INTEL_FIXED_0_USER (1ULL << 1)
#define INTEL_FIXED_0_ANYTHREAD (1ULL << 2)
#define INTEL_FIXED_0_ENABLE_PMI (1ULL << 3)
+#define INTEL_FIXED_3_METRICS_CLEAR (1ULL << 2)
#define HSW_IN_TX (1ULL << 32)
#define HSW_IN_TX_CHECKPOINTED (1ULL << 33)
@@ -372,6 +373,9 @@ static inline bool use_fixed_pseudo_encoding(u64 code)
#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_MEM_BOUND
#define INTEL_TD_METRIC_NUM 8
+#define INTEL_TD_CFG_METRIC_CLEAR_BIT 0
+#define INTEL_TD_CFG_METRIC_CLEAR BIT_ULL(INTEL_TD_CFG_METRIC_CLEAR_BIT)
+
static inline bool is_metric_idx(int idx)
{
return (unsigned)(idx - INTEL_PMC_IDX_METRIC_BASE) < INTEL_TD_METRIC_NUM;
@@ -422,7 +426,9 @@ static inline bool is_topdown_idx(int idx)
*/
struct pebs_basic {
- u64 format_size;
+ u64 format_group:32,
+ retire_latency:16,
+ format_size:16;
u64 ip;
u64 applicable_counters;
u64 tsc;
@@ -431,7 +437,17 @@ struct pebs_basic {
struct pebs_meminfo {
u64 address;
u64 aux;
- u64 latency;
+ union {
+ /* pre Alder Lake */
+ u64 mem_latency;
+ /* Alder Lake and later */
+ struct {
+ u64 instr_latency:16;
+ u64 pad2:16;
+ u64 cache_latency:16;
+ u64 pad3:16;
+ };
+ };
u64 tsx_tuning;
};
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 20e6009381ed..c0cd10182e90 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -98,6 +98,7 @@ struct cpuinfo_topology {
// Logical ID mappings
u32 logical_pkg_id;
u32 logical_die_id;
+ u32 logical_core_id;
// AMD Node ID and Nodes per Package info
u32 amd_node_id;
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index fd41103ad342..3973cb9bb2e6 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -143,6 +143,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu);
#define topology_logical_package_id(cpu) (cpu_data(cpu).topo.logical_pkg_id)
#define topology_physical_package_id(cpu) (cpu_data(cpu).topo.pkg_id)
#define topology_logical_die_id(cpu) (cpu_data(cpu).topo.logical_die_id)
+#define topology_logical_core_id(cpu) (cpu_data(cpu).topo.logical_core_id)
#define topology_die_id(cpu) (cpu_data(cpu).topo.die_id)
#define topology_core_id(cpu) (cpu_data(cpu).topo.core_id)
#define topology_ppin(cpu) (cpu_data(cpu).ppin)