diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-25 10:33:23 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-25 10:33:23 -0700 |
| commit | d5048d1176b8e76e687fc145df785118424e1ec2 (patch) | |
| tree | 69d905e72c3fd88c8bdbae14a3b29ac9f6211e37 /include | |
| parent | 0ae2062ee3ebf11981f9bdb6198fa734d8c0f46c (diff) | |
| parent | e40d3709c0225f5f681fd300f65a65ac63b10f83 (diff) | |
Merge tag 'timers-core-2025-03-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer core updates from Thomas Gleixner:
- Fix a memory ordering issue in posix-timers
Posix-timer lookup is lockless and reevaluates the timer validity
under the timer lock, but the update which validates the timer is not
protected by the timer lock. That allows the store to be reordered
against the initialization stores, so that the lookup side can
observe a partially initialized timer. That's mostly a theoretical
problem, but incorrect nevertheless.
- Fix a long standing inconsistency of the coarse time getters
The coarse time getters read the base time of the current update
cycle without reading the actual hardware clock. NTP frequency
adjustment can set the base time backwards. The fine grained
interfaces compensate this by reading the clock and applying the new
conversion factor, but the coarse grained time getters use the base
time directly. That allows the user to observe time going backwards.
Cure it by always forwarding base time, when NTP changes the
frequency with an immediate step.
- Rework of posix-timer hashing
The posix-timer hash is not scalable and due to the CRIU timer
restore mechanism prone to massive contention on the global hash
bucket lock.
Replace the global hash lock with a fine grained per bucket locking
scheme to address that.
- Rework the proc/$PID/timers interface.
/proc/$PID/timers is provided for CRIU to be able to restore a timer.
The printout happens with sighand lock held and interrupts disabled.
That's not required as this can be done with RCU protection as well.
- Provide a sane mechanism for CRIU to restore a timer ID
CRIU restores timers by creating and deleting them until the kernel
internal per process ID counter reached the requested ID. That's
horribly slow for sparse timer IDs.
Provide a prctl() which allows CRIU to restore a timer with a given
ID. When enabled the ID pointer is used as input pointer to read the
requested ID from user space. When disabled, the normal allocation
scheme (next ID) is active as before. This is backwards compatible
for both kernel and user space.
- Make hrtimer_update_function() less expensive.
The sanity checks are valuable, but expensive for high frequency
usage in io/uring. Make the debug checks conditional and enable them
only when lockdep is enabled.
- Small updates, cleanups and improvements
* tag 'timers-core-2025-03-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (27 commits)
selftests/timers: Improve skew_consistency by testing with other clockids
timekeeping: Fix possible inconsistencies in _COARSE clockids
posix-timers: Drop redundant memset() invocation
selftests/timers/posix-timers: Add a test for exact allocation mode
posix-timers: Provide a mechanism to allocate a given timer ID
posix-timers: Dont iterate /proc/$PID/timers with sighand:: Siglock held
posix-timers: Make per process list RCU safe
posix-timers: Avoid false cacheline sharing
posix-timers: Switch to jhash32()
posix-timers: Improve hash table performance
posix-timers: Make signal_struct:: Next_posix_timer_id an atomic_t
posix-timers: Make lock_timer() use guard()
posix-timers: Rework timer removal
posix-timers: Simplify lock/unlock_timer()
posix-timers: Use guards in a few places
posix-timers: Remove SLAB_PANIC from kmem cache
posix-timers: Remove a few paranoid warnings
posix-timers: Cleanup includes
posix-timers: Add cond_resched() to posix_timer_add() search loop
posix-timers: Initialise timer before adding it to the hash table
...
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/cleanup.h | 22 | ||||
| -rw-r--r-- | include/linux/hrtimer.h | 3 | ||||
| -rw-r--r-- | include/linux/posix-timers.h | 30 | ||||
| -rw-r--r-- | include/linux/sched/signal.h | 3 | ||||
| -rw-r--r-- | include/uapi/linux/prctl.h | 11 |
5 files changed, 50 insertions, 19 deletions
diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index f7750eab1832..2b32a5759b22 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -308,11 +308,21 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ #define __DEFINE_CLASS_IS_CONDITIONAL(_name, _is_cond) \ static __maybe_unused const bool class_##_name##_is_conditional = _is_cond -#define DEFINE_GUARD(_name, _type, _lock, _unlock) \ +#define __DEFINE_GUARD_LOCK_PTR(_name, _exp) \ + static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ + { return (void *)(__force unsigned long)*(_exp); } + +#define DEFINE_CLASS_IS_GUARD(_name) \ __DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ + __DEFINE_GUARD_LOCK_PTR(_name, _T) + +#define DEFINE_CLASS_IS_COND_GUARD(_name) \ + __DEFINE_CLASS_IS_CONDITIONAL(_name, true); \ + __DEFINE_GUARD_LOCK_PTR(_name, _T) + +#define DEFINE_GUARD(_name, _type, _lock, _unlock) \ DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \ - static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ - { return (void *)(__force unsigned long)*_T; } + DEFINE_CLASS_IS_GUARD(_name) #define DEFINE_GUARD_COND(_name, _ext, _condlock) \ __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ @@ -392,11 +402,7 @@ static inline void class_##_name##_destructor(class_##_name##_t *_T) \ if (_T->lock) { _unlock; } \ } \ \ -static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \ -{ \ - return (void *)(__force unsigned long)_T->lock; \ -} - +__DEFINE_GUARD_LOCK_PTR(_name, &_T->lock) #define __DEFINE_LOCK_GUARD_1(_name, _type, _lock) \ static inline class_##_name##_t class_##_name##_constructor(_type *l) \ diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index f7bfdcf0dda3..bdd55c1f0d73 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -333,6 +333,7 @@ static inline int hrtimer_callback_running(struct hrtimer *timer) static inline void hrtimer_update_function(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *)) { +#ifdef CONFIG_PROVE_LOCKING guard(raw_spinlock_irqsave)(&timer->base->cpu_base->lock); if (WARN_ON_ONCE(hrtimer_is_queued(timer))) @@ -340,7 +341,7 @@ static inline void hrtimer_update_function(struct hrtimer *timer, if (WARN_ON_ONCE(!function)) return; - +#endif timer->function = function; } diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index f11f10c97bd9..dd48c64b605e 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -114,6 +114,7 @@ bool posixtimer_init_sigqueue(struct sigqueue *q); void posixtimer_send_sigqueue(struct k_itimer *tmr); bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq); void posixtimer_free_timer(struct k_itimer *timer); +long posixtimer_create_prctl(unsigned long ctrl); /* Init task static initializer */ #define INIT_CPU_TIMERBASE(b) { \ @@ -140,6 +141,7 @@ static inline void posixtimer_rearm_itimer(struct task_struct *p) { } static inline bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq) { return false; } static inline void posixtimer_free_timer(struct k_itimer *timer) { } +static inline long posixtimer_create_prctl(unsigned long ctrl) { return -EINVAL; } #endif #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK @@ -177,23 +179,26 @@ static inline void posix_cputimers_init_work(void) { } * @rcu: RCU head for freeing the timer. */ struct k_itimer { - struct hlist_node list; - struct hlist_node ignored_list; + /* 1st cacheline contains read-mostly fields */ struct hlist_node t_hash; - spinlock_t it_lock; - const struct k_clock *kclock; - clockid_t it_clock; + struct hlist_node list; timer_t it_id; + clockid_t it_clock; + int it_sigev_notify; + enum pid_type it_pid_type; + struct signal_struct *it_signal; + const struct k_clock *kclock; + + /* 2nd cacheline and above contain fields which are modified regularly */ + spinlock_t it_lock; int it_status; bool it_sig_periodic; s64 it_overrun; s64 it_overrun_last; unsigned int it_signal_seq; unsigned int it_sigqueue_seq; - int it_sigev_notify; - enum pid_type it_pid_type; ktime_t it_interval; - struct signal_struct *it_signal; + struct hlist_node ignored_list; union { struct pid *it_pid; struct task_struct *it_process; @@ -210,7 +215,7 @@ struct k_itimer { } alarm; } it; struct rcu_head rcu; -}; +} ____cacheline_aligned_in_smp; void run_posix_cpu_timers(void); void posix_cpu_timers_exit(struct task_struct *task); @@ -240,6 +245,13 @@ static inline void posixtimer_sigqueue_putref(struct sigqueue *q) posixtimer_putref(tmr); } + +static inline bool posixtimer_valid(const struct k_itimer *timer) +{ + unsigned long val = (unsigned long)timer->it_signal; + + return !(val & 0x1UL); +} #else /* CONFIG_POSIX_TIMERS */ static inline void posixtimer_sigqueue_getref(struct sigqueue *q) { } static inline void posixtimer_sigqueue_putref(struct sigqueue *q) { } diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index d5d03d919df8..1ef1edbaaf79 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -136,7 +136,8 @@ struct signal_struct { #ifdef CONFIG_POSIX_TIMERS /* POSIX.1b Interval Timers */ - unsigned int next_posix_timer_id; + unsigned int timer_create_restore_ids:1; + atomic_t next_posix_timer_id; struct hlist_head posix_timers; struct hlist_head ignored_posix_timers; diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 5c6080680cb2..15c18ef4eb11 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -353,4 +353,15 @@ struct prctl_mm_map { */ #define PR_LOCK_SHADOW_STACK_STATUS 76 +/* + * Controls the mode of timer_create() for CRIU restore operations. + * Enabling this allows CRIU to restore timers with explicit IDs. + * + * Don't use for normal operations as the result might be undefined. + */ +#define PR_TIMER_CREATE_RESTORE_IDS 77 +# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0 +# define PR_TIMER_CREATE_RESTORE_IDS_ON 1 +# define PR_TIMER_CREATE_RESTORE_IDS_GET 2 + #endif /* _LINUX_PRCTL_H */ |
