summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-03-25 10:33:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-03-25 10:33:23 -0700
commitd5048d1176b8e76e687fc145df785118424e1ec2 (patch)
tree69d905e72c3fd88c8bdbae14a3b29ac9f6211e37 /include
parent0ae2062ee3ebf11981f9bdb6198fa734d8c0f46c (diff)
parente40d3709c0225f5f681fd300f65a65ac63b10f83 (diff)
Merge tag 'timers-core-2025-03-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer core updates from Thomas Gleixner: - Fix a memory ordering issue in posix-timers Posix-timer lookup is lockless and reevaluates the timer validity under the timer lock, but the update which validates the timer is not protected by the timer lock. That allows the store to be reordered against the initialization stores, so that the lookup side can observe a partially initialized timer. That's mostly a theoretical problem, but incorrect nevertheless. - Fix a long standing inconsistency of the coarse time getters The coarse time getters read the base time of the current update cycle without reading the actual hardware clock. NTP frequency adjustment can set the base time backwards. The fine grained interfaces compensate this by reading the clock and applying the new conversion factor, but the coarse grained time getters use the base time directly. That allows the user to observe time going backwards. Cure it by always forwarding base time, when NTP changes the frequency with an immediate step. - Rework of posix-timer hashing The posix-timer hash is not scalable and due to the CRIU timer restore mechanism prone to massive contention on the global hash bucket lock. Replace the global hash lock with a fine grained per bucket locking scheme to address that. - Rework the proc/$PID/timers interface. /proc/$PID/timers is provided for CRIU to be able to restore a timer. The printout happens with sighand lock held and interrupts disabled. That's not required as this can be done with RCU protection as well. - Provide a sane mechanism for CRIU to restore a timer ID CRIU restores timers by creating and deleting them until the kernel internal per process ID counter reached the requested ID. That's horribly slow for sparse timer IDs. Provide a prctl() which allows CRIU to restore a timer with a given ID. When enabled the ID pointer is used as input pointer to read the requested ID from user space. When disabled, the normal allocation scheme (next ID) is active as before. This is backwards compatible for both kernel and user space. - Make hrtimer_update_function() less expensive. The sanity checks are valuable, but expensive for high frequency usage in io/uring. Make the debug checks conditional and enable them only when lockdep is enabled. - Small updates, cleanups and improvements * tag 'timers-core-2025-03-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (27 commits) selftests/timers: Improve skew_consistency by testing with other clockids timekeeping: Fix possible inconsistencies in _COARSE clockids posix-timers: Drop redundant memset() invocation selftests/timers/posix-timers: Add a test for exact allocation mode posix-timers: Provide a mechanism to allocate a given timer ID posix-timers: Dont iterate /proc/$PID/timers with sighand:: Siglock held posix-timers: Make per process list RCU safe posix-timers: Avoid false cacheline sharing posix-timers: Switch to jhash32() posix-timers: Improve hash table performance posix-timers: Make signal_struct:: Next_posix_timer_id an atomic_t posix-timers: Make lock_timer() use guard() posix-timers: Rework timer removal posix-timers: Simplify lock/unlock_timer() posix-timers: Use guards in a few places posix-timers: Remove SLAB_PANIC from kmem cache posix-timers: Remove a few paranoid warnings posix-timers: Cleanup includes posix-timers: Add cond_resched() to posix_timer_add() search loop posix-timers: Initialise timer before adding it to the hash table ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/cleanup.h22
-rw-r--r--include/linux/hrtimer.h3
-rw-r--r--include/linux/posix-timers.h30
-rw-r--r--include/linux/sched/signal.h3
-rw-r--r--include/uapi/linux/prctl.h11
5 files changed, 50 insertions, 19 deletions
diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index f7750eab1832..2b32a5759b22 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -308,11 +308,21 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \
#define __DEFINE_CLASS_IS_CONDITIONAL(_name, _is_cond) \
static __maybe_unused const bool class_##_name##_is_conditional = _is_cond
-#define DEFINE_GUARD(_name, _type, _lock, _unlock) \
+#define __DEFINE_GUARD_LOCK_PTR(_name, _exp) \
+ static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \
+ { return (void *)(__force unsigned long)*(_exp); }
+
+#define DEFINE_CLASS_IS_GUARD(_name) \
__DEFINE_CLASS_IS_CONDITIONAL(_name, false); \
+ __DEFINE_GUARD_LOCK_PTR(_name, _T)
+
+#define DEFINE_CLASS_IS_COND_GUARD(_name) \
+ __DEFINE_CLASS_IS_CONDITIONAL(_name, true); \
+ __DEFINE_GUARD_LOCK_PTR(_name, _T)
+
+#define DEFINE_GUARD(_name, _type, _lock, _unlock) \
DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \
- static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \
- { return (void *)(__force unsigned long)*_T; }
+ DEFINE_CLASS_IS_GUARD(_name)
#define DEFINE_GUARD_COND(_name, _ext, _condlock) \
__DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \
@@ -392,11 +402,7 @@ static inline void class_##_name##_destructor(class_##_name##_t *_T) \
if (_T->lock) { _unlock; } \
} \
\
-static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \
-{ \
- return (void *)(__force unsigned long)_T->lock; \
-}
-
+__DEFINE_GUARD_LOCK_PTR(_name, &_T->lock)
#define __DEFINE_LOCK_GUARD_1(_name, _type, _lock) \
static inline class_##_name##_t class_##_name##_constructor(_type *l) \
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index f7bfdcf0dda3..bdd55c1f0d73 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -333,6 +333,7 @@ static inline int hrtimer_callback_running(struct hrtimer *timer)
static inline void hrtimer_update_function(struct hrtimer *timer,
enum hrtimer_restart (*function)(struct hrtimer *))
{
+#ifdef CONFIG_PROVE_LOCKING
guard(raw_spinlock_irqsave)(&timer->base->cpu_base->lock);
if (WARN_ON_ONCE(hrtimer_is_queued(timer)))
@@ -340,7 +341,7 @@ static inline void hrtimer_update_function(struct hrtimer *timer,
if (WARN_ON_ONCE(!function))
return;
-
+#endif
timer->function = function;
}
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index f11f10c97bd9..dd48c64b605e 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -114,6 +114,7 @@ bool posixtimer_init_sigqueue(struct sigqueue *q);
void posixtimer_send_sigqueue(struct k_itimer *tmr);
bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq);
void posixtimer_free_timer(struct k_itimer *timer);
+long posixtimer_create_prctl(unsigned long ctrl);
/* Init task static initializer */
#define INIT_CPU_TIMERBASE(b) { \
@@ -140,6 +141,7 @@ static inline void posixtimer_rearm_itimer(struct task_struct *p) { }
static inline bool posixtimer_deliver_signal(struct kernel_siginfo *info,
struct sigqueue *timer_sigq) { return false; }
static inline void posixtimer_free_timer(struct k_itimer *timer) { }
+static inline long posixtimer_create_prctl(unsigned long ctrl) { return -EINVAL; }
#endif
#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
@@ -177,23 +179,26 @@ static inline void posix_cputimers_init_work(void) { }
* @rcu: RCU head for freeing the timer.
*/
struct k_itimer {
- struct hlist_node list;
- struct hlist_node ignored_list;
+ /* 1st cacheline contains read-mostly fields */
struct hlist_node t_hash;
- spinlock_t it_lock;
- const struct k_clock *kclock;
- clockid_t it_clock;
+ struct hlist_node list;
timer_t it_id;
+ clockid_t it_clock;
+ int it_sigev_notify;
+ enum pid_type it_pid_type;
+ struct signal_struct *it_signal;
+ const struct k_clock *kclock;
+
+ /* 2nd cacheline and above contain fields which are modified regularly */
+ spinlock_t it_lock;
int it_status;
bool it_sig_periodic;
s64 it_overrun;
s64 it_overrun_last;
unsigned int it_signal_seq;
unsigned int it_sigqueue_seq;
- int it_sigev_notify;
- enum pid_type it_pid_type;
ktime_t it_interval;
- struct signal_struct *it_signal;
+ struct hlist_node ignored_list;
union {
struct pid *it_pid;
struct task_struct *it_process;
@@ -210,7 +215,7 @@ struct k_itimer {
} alarm;
} it;
struct rcu_head rcu;
-};
+} ____cacheline_aligned_in_smp;
void run_posix_cpu_timers(void);
void posix_cpu_timers_exit(struct task_struct *task);
@@ -240,6 +245,13 @@ static inline void posixtimer_sigqueue_putref(struct sigqueue *q)
posixtimer_putref(tmr);
}
+
+static inline bool posixtimer_valid(const struct k_itimer *timer)
+{
+ unsigned long val = (unsigned long)timer->it_signal;
+
+ return !(val & 0x1UL);
+}
#else /* CONFIG_POSIX_TIMERS */
static inline void posixtimer_sigqueue_getref(struct sigqueue *q) { }
static inline void posixtimer_sigqueue_putref(struct sigqueue *q) { }
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index d5d03d919df8..1ef1edbaaf79 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -136,7 +136,8 @@ struct signal_struct {
#ifdef CONFIG_POSIX_TIMERS
/* POSIX.1b Interval Timers */
- unsigned int next_posix_timer_id;
+ unsigned int timer_create_restore_ids:1;
+ atomic_t next_posix_timer_id;
struct hlist_head posix_timers;
struct hlist_head ignored_posix_timers;
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 5c6080680cb2..15c18ef4eb11 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -353,4 +353,15 @@ struct prctl_mm_map {
*/
#define PR_LOCK_SHADOW_STACK_STATUS 76
+/*
+ * Controls the mode of timer_create() for CRIU restore operations.
+ * Enabling this allows CRIU to restore timers with explicit IDs.
+ *
+ * Don't use for normal operations as the result might be undefined.
+ */
+#define PR_TIMER_CREATE_RESTORE_IDS 77
+# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0
+# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
+# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
+
#endif /* _LINUX_PRCTL_H */