Merge tag 'perf-core-2025-12-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull performance events updates from Ingo Molnar: "Callchain support: - Add support for deferred user-space stack unwinding for perf, enabled on x86. (Peter Zijlstra, Steven Rostedt) - unwind_user/x86: Enable frame pointer unwinding on x86 (Josh Poimboeuf) x86 PMU support and infrastructure: - x86/insn: Simplify for_each_insn_prefix() (Peter Zijlstra) - x86/insn,uprobes,alternative: Unify insn_is_nop() (Peter Zijlstra) Intel PMU driver: - Large series to prepare for and implement architectural PEBS support for Intel platforms such as Clearwater Forest (CWF) and Panther Lake (PTL). (Dapeng Mi, Kan Liang) - Check dynamic constraints (Kan Liang) - Optimize PEBS extended config (Peter Zijlstra) - cstates: - Remove PC3 support from LunarLake (Zhang Rui) - Add Pantherlake support (Zhang Rui) - Clearwater Forest support (Zide Chen) AMD PMU driver: - x86/amd: Check event before enable to avoid GPF (George Kennedy) Fixes and cleanups: - task_work: Fix NMI race condition (Peter Zijlstra) - perf/x86: Fix NULL event access and potential PEBS record loss (Dapeng Mi) - Misc other fixes and cleanups (Dapeng Mi, Ingo Molnar, Peter Zijlstra)" * tag 'perf-core-2025-12-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (38 commits) perf/x86/intel: Fix and clean up intel_pmu_drain_arch_pebs() type use perf/x86/intel: Optimize PEBS extended config perf/x86/intel: Check PEBS dyn_constraints perf/x86/intel: Add a check for dynamic constraints perf/x86/intel: Add counter group support for arch-PEBS perf/x86/intel: Setup PEBS data configuration and enable legacy groups perf/x86/intel: Update dyn_constraint base on PEBS event precise level perf/x86/intel: Allocate arch-PEBS buffer and initialize PEBS_BASE MSR perf/x86/intel: Process arch-PEBS records or record fragments perf/x86/intel/ds: Factor out PEBS group processing code to functions perf/x86/intel/ds: Factor out PEBS record processing code to functions perf/x86/intel: Initialize architectural PEBS perf/x86/intel: Correct large PEBS flag check perf/x86/intel: Replace x86_pmu.drain_pebs calling with static call perf/x86: Fix NULL event access and potential PEBS record loss perf/x86: Remove redundant is_x86_event() prototype entry,unwind/deferred: Fix unwind_reset_info() placement unwind_user/x86: Fix arch=um build perf: Support deferred user unwind unwind_user/x86: Teach FP unwind about start of function ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2025-12-01 20:42:01 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2025-12-01 20:42:01 -0800
commit: 6c26fbe8c9d3e932dce6afe2505b19b4b261cae9 (patch)
tree: 81cc40ecd2cde95b1b37937cf270cc0fa3832c43 /kernel/unwind/deferred.c
parent: 63e6995005be8ceb8a1d56a18df1a1a40c28356d (diff)
parent: 9929dffce5ed7e2988e0274f4db98035508b16d9 (diff)
1 files changed, 24 insertions, 20 deletions
diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c
index dc6040aae3ee..a88fb481c4a3 100644
--- a/kernel/unwind/deferred.c
+++ b/kernel/unwind/deferred.c
@@ -53,7 +53,7 @@ DEFINE_STATIC_SRCU(unwind_srcu);
 
 static inline bool unwind_pending(struct unwind_task_info *info)
 {
-	return test_bit(UNWIND_PENDING_BIT, &info->unwind_mask);
+	return atomic_long_read(&info->unwind_mask) & UNWIND_PENDING;
 }
 
 /*
@@ -79,6 +79,8 @@ static u64 get_cookie(struct unwind_task_info *info)
 {
 	u32 cnt = 1;
 
+	lockdep_assert_irqs_disabled();
+
 	if (info->id.cpu)
 		return info->id.id;
 
@@ -126,23 +128,20 @@ int unwind_user_faultable(struct unwind_stacktrace *trace)
 
 	cache = info->cache;
 	trace->entries = cache->entries;
-
-	if (cache->nr_entries) {
-		/*
-		 * The user stack has already been previously unwound in this
-		 * entry context.  Skip the unwind and use the cache.
-		 */
-		trace->nr = cache->nr_entries;
+	trace->nr = cache->nr_entries;
+	/*
+	 * The user stack has already been previously unwound in this
+	 * entry context.  Skip the unwind and use the cache.
+	 */
+	if (trace->nr)
 		return 0;
-	}
 
-	trace->nr = 0;
 	unwind_user(trace, UNWIND_MAX_ENTRIES);
 
 	cache->nr_entries = trace->nr;
 
 	/* Clear nr_entries on way back to user space */
-	set_bit(UNWIND_USED_BIT, &info->unwind_mask);
+	atomic_long_or(UNWIND_USED, &info->unwind_mask);
 
 	return 0;
 }
@@ -160,7 +159,7 @@ static void process_unwind_deferred(struct task_struct *task)
 
 	/* Clear pending bit but make sure to have the current bits */
 	bits = atomic_long_fetch_andnot(UNWIND_PENDING,
-				  (atomic_long_t *)&info->unwind_mask);
+					&info->unwind_mask);
 	/*
 	 * From here on out, the callback must always be called, even if it's
 	 * just an empty trace.
@@ -231,6 +230,7 @@ void unwind_deferred_task_exit(struct task_struct *task)
 int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
 {
 	struct unwind_task_info *info = &current->unwind_info;
+	int twa_mode = TWA_RESUME;
 	unsigned long old, bits;
 	unsigned long bit;
 	int ret;
@@ -246,8 +246,11 @@ int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
 	 * Trigger a warning to make it obvious that an architecture
 	 * is using this in NMI when it should not be.
 	 */
-	if (WARN_ON_ONCE(!CAN_USE_IN_NMI && in_nmi()))
-		return -EINVAL;
+	if (in_nmi()) {
+		if (WARN_ON_ONCE(!CAN_USE_IN_NMI))
+			return -EINVAL;
+		twa_mode = TWA_NMI_CURRENT;
+	}
 
 	/* Do not allow cancelled works to request again */
 	bit = READ_ONCE(work->bit);
@@ -261,7 +264,7 @@ int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
 
 	*cookie = get_cookie(info);
 
-	old = READ_ONCE(info->unwind_mask);
+	old = atomic_long_read(&info->unwind_mask);
 
 	/* Is this already queued or executed */
 	if (old & bit)
@@ -274,7 +277,7 @@ int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
 	 * to have a callback.
 	 */
 	bits = UNWIND_PENDING | bit;
-	old = atomic_long_fetch_or(bits, (atomic_long_t *)&info->unwind_mask);
+	old = atomic_long_fetch_or(bits, &info->unwind_mask);
 	if (old & bits) {
 		/*
 		 * If the work's bit was set, whatever set it had better
@@ -285,10 +288,10 @@ int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
 	}
 
 	/* The work has been claimed, now schedule it. */
-	ret = task_work_add(current, &info->work, TWA_RESUME);
+	ret = task_work_add(current, &info->work, twa_mode);
 
 	if (WARN_ON_ONCE(ret))
-		WRITE_ONCE(info->unwind_mask, 0);
+		atomic_long_set(&info->unwind_mask, 0);
 
 	return ret;
 }
@@ -320,7 +323,8 @@ void unwind_deferred_cancel(struct unwind_work *work)
 	guard(rcu)();
 	/* Clear this bit from all threads */
 	for_each_process_thread(g, t) {
-		clear_bit(bit, &t->unwind_info.unwind_mask);
+		atomic_long_andnot(BIT(bit),
+				   &t->unwind_info.unwind_mask);
 		if (t->unwind_info.cache)
 			clear_bit(bit, &t->unwind_info.cache->unwind_completed);
 	}
@@ -350,7 +354,7 @@ void unwind_task_init(struct task_struct *task)
 
 	memset(info, 0, sizeof(*info));
 	init_task_work(&info->work, unwind_deferred_task_work);
-	info->unwind_mask = 0;
+	atomic_long_set(&info->unwind_mask, 0);
 }
 
 void unwind_task_free(struct task_struct *task)
author	Linus Torvalds <torvalds@linux-foundation.org>	2025-12-01 20:42:01 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2025-12-01 20:42:01 -0800
commit	6c26fbe8c9d3e932dce6afe2505b19b4b261cae9 (patch)
tree	81cc40ecd2cde95b1b37937cf270cc0fa3832c43 /kernel/unwind/deferred.c
parent	63e6995005be8ceb8a1d56a18df1a1a40c28356d (diff)
parent	9929dffce5ed7e2988e0274f4db98035508b16d9 (diff)