From 511885d7061eda3eb1faf3f57dcc936ff75863f1 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 24 Jul 2019 08:23:23 -0700 Subject: lib/timerqueue: Rely on rbtree semantics for next timer Simplify the timerqueue code by using cached rbtrees and rely on the tree leftmost node semantics to get the timer with earliest expiration time. This is a drop in conversion, and therefore semantics remain untouched. The runtime overhead of cached rbtrees is be pretty much the same as the current head->next method, noting that when removing the leftmost node, a common operation for the timerqueue, the rb_next(leftmost) is O(1) as well, so the next timer will either be the right node or its parent. Therefore no extra pointer chasing. Finally, the size of the struct timerqueue_head remains the same. Passes several hours of rcutorture. Signed-off-by: Davidlohr Bueso Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20190724152323.bojciei3muvfxalm@linux-r8p5 --- include/linux/timerqueue.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h index 78b8cc73f12f..aff122f1062a 100644 --- a/include/linux/timerqueue.h +++ b/include/linux/timerqueue.h @@ -12,8 +12,7 @@ struct timerqueue_node { }; struct timerqueue_head { - struct rb_root head; - struct timerqueue_node *next; + struct rb_root_cached rb_root; }; @@ -29,13 +28,14 @@ extern struct timerqueue_node *timerqueue_iterate_next( * * @head: head of timerqueue * - * Returns a pointer to the timer node that has the - * earliest expiration time. + * Returns a pointer to the timer node that has the earliest expiration time. */ static inline struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head) { - return head->next; + struct rb_node *leftmost = rb_first_cached(&head->rb_root); + + return rb_entry(leftmost, struct timerqueue_node, node); } static inline void timerqueue_init(struct timerqueue_node *node) @@ -45,7 +45,6 @@ static inline void timerqueue_init(struct timerqueue_node *node) static inline void timerqueue_init_head(struct timerqueue_head *head) { - head->head = RB_ROOT; - head->next = NULL; + head->rb_root = RB_ROOT_CACHED; } #endif /* _LINUX_TIMERQUEUE_H */ -- cgit v1.2.3 From b74494872555d1f7888dfd9225700a363f4a84fc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 26 Jul 2019 20:30:49 +0200 Subject: hrtimer: Remove task argument from hrtimer_init_sleeper() All callers hand in 'current' and that's the only task pointer which actually makes sense. Remove the task argument and set current in the function. Signed-off-by: Thomas Gleixner Reviewed-by: Steven Rostedt (VMware) Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190726185752.791885290@linutronix.de --- include/linux/hrtimer.h | 3 +-- include/linux/wait.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 4971100a8cab..3c74f89367c4 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -463,8 +463,7 @@ extern long hrtimer_nanosleep(const struct timespec64 *rqtp, const enum hrtimer_mode mode, const clockid_t clockid); -extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, - struct task_struct *tsk); +extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl); extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta, const enum hrtimer_mode mode); diff --git a/include/linux/wait.h b/include/linux/wait.h index b6f77cf60dd7..d57832774ca6 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -489,7 +489,7 @@ do { \ struct hrtimer_sleeper __t; \ \ hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); \ - hrtimer_init_sleeper(&__t, current); \ + hrtimer_init_sleeper(&__t); \ if ((timeout) != KTIME_MAX) \ hrtimer_start_range_ns(&__t.timer, timeout, \ current->timer_slack_ns, \ -- cgit v1.2.3 From dbc1625fc9deefb352f6ff26a575ae4b3ddef23a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 26 Jul 2019 20:30:50 +0200 Subject: hrtimer: Consolidate hrtimer_init() + hrtimer_init_sleeper() calls hrtimer_init_sleeper() calls require prior initialisation of the hrtimer object which is embedded into the hrtimer_sleeper. Combine the initialization and spare a function call. Fixup all call sites. This is also a preparatory change for PREEMPT_RT to do hrtimer sleeper specific initializations of the embedded hrtimer without modifying any of the call sites. No functional change. [ anna-maria: Minor cleanups ] [ tglx: Adopted to the removal of the task argument of hrtimer_init_sleeper() and trivial polishing. Folded a fix from Stephen Rothwell for the vsoc code ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Anna-Maria Gleixner Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190726185752.887468908@linutronix.de --- include/linux/hrtimer.h | 17 ++++++++++++++--- include/linux/wait.h | 4 ++-- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 3c74f89367c4..0df373bed3d7 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -347,10 +347,15 @@ DECLARE_PER_CPU(struct tick_device, tick_cpu_device); /* Initialize timers: */ extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock, enum hrtimer_mode mode); +extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, + enum hrtimer_mode mode); #ifdef CONFIG_DEBUG_OBJECTS_TIMERS extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock, enum hrtimer_mode mode); +extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, + clockid_t clock_id, + enum hrtimer_mode mode); extern void destroy_hrtimer_on_stack(struct hrtimer *timer); #else @@ -360,6 +365,14 @@ static inline void hrtimer_init_on_stack(struct hrtimer *timer, { hrtimer_init(timer, which_clock, mode); } + +static inline void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, + clockid_t clock_id, + enum hrtimer_mode mode) +{ + hrtimer_init_sleeper(sl, clock_id, mode); +} + static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { } #endif @@ -463,10 +476,8 @@ extern long hrtimer_nanosleep(const struct timespec64 *rqtp, const enum hrtimer_mode mode, const clockid_t clockid); -extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl); - extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta, - const enum hrtimer_mode mode); + const enum hrtimer_mode mode); extern int schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, const enum hrtimer_mode mode, diff --git a/include/linux/wait.h b/include/linux/wait.h index d57832774ca6..4707543ef575 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -488,8 +488,8 @@ do { \ int __ret = 0; \ struct hrtimer_sleeper __t; \ \ - hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); \ - hrtimer_init_sleeper(&__t); \ + hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC, \ + HRTIMER_MODE_REL); \ if ((timeout) != KTIME_MAX) \ hrtimer_start_range_ns(&__t.timer, timeout, \ current->timer_slack_ns, \ -- cgit v1.2.3 From 01656464fce946f70b02a84ab218e562ceb1662e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 30 Jul 2019 21:03:53 +0200 Subject: hrtimer: Provide hrtimer_sleeper_start_expires() hrtimer_sleepers will gain a scheduling class dependent treatment on PREEMPT_RT. Create a wrapper around hrtimer_start_expires() to make that possible. Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 0df373bed3d7..24072a0942c0 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -408,6 +408,9 @@ static inline void hrtimer_start_expires(struct hrtimer *timer, hrtimer_start_range_ns(timer, soft, delta, mode); } +void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl, + enum hrtimer_mode mode); + static inline void hrtimer_restart(struct hrtimer *timer) { hrtimer_start_expires(timer, HRTIMER_MODE_ABS); -- cgit v1.2.3 From ae6683d815895c2be1e60e1942630fa99488055b Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 26 Jul 2019 20:30:51 +0200 Subject: hrtimer: Introduce HARD expiry mode On PREEMPT_RT not all hrtimers can be expired in hard interrupt context even if that is perfectly fine on a PREEMPT_RT=n kernel, e.g. because they take regular spinlocks. Also for latency reasons PREEMPT_RT tries to defer most hrtimers' expiry into soft interrupt context. But there are hrtimers which must be expired in hard interrupt context even when PREEMPT_RT is enabled: - hrtimers which must expiry in hard interrupt context, e.g. scheduler, perf, watchdog related hrtimers - latency critical hrtimers, e.g. nanosleep, ..., kvm lapic timer Add a new mode flag HRTIMER_MODE_HARD which allows to mark these timers so PREEMPT_RT will not move them into softirq expiry mode. [ tglx: Split out of a larger combo patch. Added changelog ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190726185752.981398465@linutronix.de --- include/linux/hrtimer.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 24072a0942c0..15c2ba6b6316 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -38,6 +38,7 @@ enum hrtimer_mode { HRTIMER_MODE_REL = 0x01, HRTIMER_MODE_PINNED = 0x02, HRTIMER_MODE_SOFT = 0x04, + HRTIMER_MODE_HARD = 0x08, HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, @@ -48,6 +49,11 @@ enum hrtimer_mode { HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT, HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT, + HRTIMER_MODE_ABS_HARD = HRTIMER_MODE_ABS | HRTIMER_MODE_HARD, + HRTIMER_MODE_REL_HARD = HRTIMER_MODE_REL | HRTIMER_MODE_HARD, + + HRTIMER_MODE_ABS_PINNED_HARD = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_HARD, + HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD, }; /* -- cgit v1.2.3 From 0ab6a3ddbad40ef5b6b8c2353fd53fa4ecf9c479 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 30 Jul 2019 20:15:25 +0200 Subject: hrtimer: Make enqueue mode check work on RT hrtimer_start_range_ns() has a WARN_ONCE() which verifies that a timer which is marker for softirq expiry is not queued in the hard interrupt base and vice versa. When PREEMPT_RT is enabled, timers which are not explicitely marked to expire in hard interrupt context are deferrred to the soft interrupt. So the regular check would trigger. Change the check, so when PREEMPT_RT is enabled, it is verified that the timers marked for hard interrupt expiry are not tried to be queued for soft interrupt expiry or any of the unmarked and softirq marked is tried to be expired in hard interrupt context. Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 15c2ba6b6316..7d0d0a36a8f4 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -107,6 +107,8 @@ enum hrtimer_restart { * @state: state information (See bit values above) * @is_rel: Set if the timer was armed relative * @is_soft: Set if hrtimer will be expired in soft interrupt context. + * @is_hard: Set if hrtimer will be expired in hard interrupt context + * even on RT. * * The hrtimer structure must be initialized by hrtimer_init() */ @@ -118,6 +120,7 @@ struct hrtimer { u8 state; u8 is_rel; u8 is_soft; + u8 is_hard; }; /** -- cgit v1.2.3 From f61eff83cec9cfab31fd30a2ca8856be379cdcd5 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Fri, 26 Jul 2019 20:30:59 +0200 Subject: hrtimer: Prepare support for PREEMPT_RT When PREEMPT_RT is enabled, the soft interrupt thread can be preempted. If the soft interrupt thread is preempted in the middle of a timer callback, then calling hrtimer_cancel() can lead to two issues: - If the caller is on a remote CPU then it has to spin wait for the timer handler to complete. This can result in unbound priority inversion. - If the caller originates from the task which preempted the timer handler on the same CPU, then spin waiting for the timer handler to complete is never going to end. To avoid these issues, add a new lock to the timer base which is held around the execution of the timer callbacks. If hrtimer_cancel() detects that the timer callback is currently running, it blocks on the expiry lock. When the callback is finished, the expiry lock is dropped by the softirq thread which wakes up the waiter and the system makes progress. This addresses both the priority inversion and the life lock issues. The same issue can happen in virtual machines when the vCPU which runs a timer callback is scheduled out. If a second vCPU of the same guest calls hrtimer_cancel() it will spin wait for the other vCPU to be scheduled back in. The expiry lock mechanism would avoid that. It'd be trivial to enable this when paravirt spinlocks are enabled in a guest, but it's not clear whether this is an actual problem in the wild, so for now it's an RT only mechanism. [ tglx: Refactored it for mainline ] Signed-off-by: Anna-Maria Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190726185753.737767218@linutronix.de --- include/linux/hrtimer.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 7d0d0a36a8f4..5df4bcff96d5 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -192,6 +192,10 @@ enum hrtimer_base_type { * @nr_retries: Total number of hrtimer interrupt retries * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt + * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are + * expired + * @timer_waiters: A hrtimer_cancel() invocation waits for the timer + * callback to finish. * @expires_next: absolute time of the next event, is required for remote * hrtimer enqueue; it is the total first expiry time (hard * and soft hrtimer are taken into account) @@ -218,6 +222,10 @@ struct hrtimer_cpu_base { unsigned short nr_retries; unsigned short nr_hangs; unsigned int max_hang_time; +#endif +#ifdef CONFIG_PREEMPT_RT + spinlock_t softirq_expiry_lock; + atomic_t timer_waiters; #endif ktime_t expires_next; struct hrtimer *next_timer; @@ -350,6 +358,14 @@ extern void hrtimers_resume(void); DECLARE_PER_CPU(struct tick_device, tick_cpu_device); +#ifdef CONFIG_PREEMPT_RT +void hrtimer_cancel_wait_running(const struct hrtimer *timer); +#else +static inline void hrtimer_cancel_wait_running(struct hrtimer *timer) +{ + cpu_relax(); +} +#endif /* Exported timer functions: */ -- cgit v1.2.3 From 030dcdd197d77374879bb5603d091eee7d8aba80 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Fri, 26 Jul 2019 20:31:00 +0200 Subject: timers: Prepare support for PREEMPT_RT When PREEMPT_RT is enabled, the soft interrupt thread can be preempted. If the soft interrupt thread is preempted in the middle of a timer callback, then calling del_timer_sync() can lead to two issues: - If the caller is on a remote CPU then it has to spin wait for the timer handler to complete. This can result in unbound priority inversion. - If the caller originates from the task which preempted the timer handler on the same CPU, then spin waiting for the timer handler to complete is never going to end. To avoid these issues, add a new lock to the timer base which is held around the execution of the timer callbacks. If del_timer_sync() detects that the timer callback is currently running, it blocks on the expiry lock. When the callback is finished, the expiry lock is dropped by the softirq thread which wakes up the waiter and the system makes progress. This addresses both the priority inversion and the life lock issues. This mechanism is not used for timers which are marked IRQSAFE as for those preemption is disabled accross the callback and therefore this situation cannot happen. The callbacks for such timers need to be individually audited for RT compliance. The same issue can happen in virtual machines when the vCPU which runs a timer callback is scheduled out. If a second vCPU of the same guest calls del_timer_sync() it will spin wait for the other vCPU to be scheduled back in. The expiry lock mechanism would avoid that. It'd be trivial to enable this when paravirt spinlocks are enabled in a guest, but it's not clear whether this is an actual problem in the wild, so for now it's an RT only mechanism. As the softirq thread can be preempted with PREEMPT_RT=y, the SMP variant of del_timer_sync() needs to be used on UP as well. [ tglx: Refactored it for mainline ] Signed-off-by: Anna-Maria Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190726185753.832418500@linutronix.de --- include/linux/timer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 282e4f2a532a..1e6650ed066d 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -183,7 +183,7 @@ extern void add_timer(struct timer_list *timer); extern int try_to_del_timer_sync(struct timer_list *timer); -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) extern int del_timer_sync(struct timer_list *timer); #else # define del_timer_sync(t) del_timer(t) -- cgit v1.2.3 From 5d99b32a009e900a561f6a42ea7afe5b21288b8a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 31 Jul 2019 00:33:54 +0200 Subject: posix-timers: Move rcu_head out of it union Timer deletion on PREEMPT_RT is prone to priority inversion and live locks. The hrtimer code has a synchronization mechanism for this. Posix CPU timers will grow one. But that mechanism cannot be invoked while holding the k_itimer lock because that can deadlock against the running timer callback. So the lock must be dropped which allows the timer to be freed. The timer free can be prevented by taking RCU readlock before dropping the lock, but because the rcu_head is part of the 'it' union a concurrent free will overwrite the hrtimer on which the task is trying to synchronize. Move the rcu_head out of the union to prevent this. [ tglx: Fixed up kernel-doc. Rewrote changelog ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190730223828.965541887@linutronix.de --- include/linux/posix-timers.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index b20798fc5191..604cec0e41ba 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -85,7 +85,8 @@ static inline int clockid_to_fd(const clockid_t clk) * @it_process: The task to wakeup on clock_nanosleep (CPU timers) * @sigq: Pointer to preallocated sigqueue * @it: Union representing the various posix timer type - * internals. Also used for rcu freeing the timer. + * internals. + * @rcu: RCU head for freeing the timer. */ struct k_itimer { struct list_head list; @@ -114,8 +115,8 @@ struct k_itimer { struct { struct alarm alarmtimer; } alarm; - struct rcu_head rcu; } it; + struct rcu_head rcu; }; void run_posix_cpu_timers(struct task_struct *task); -- cgit v1.2.3 From ce03f613461642669d6150c405dd28f4bfd54bbb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 19 Aug 2019 16:31:42 +0200 Subject: posix-timers: Cleanup forward declarations and includes - Rename struct siginfo to kernel_siginfo as that is used and required - Add a forward declaration for task_struct and remove sched.h include - Remove timex.h include as it is not needed Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lkml.kernel.org/r/20190819143801.472005793@linutronix.de --- include/linux/posix-timers.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 604cec0e41ba..26c636d1485b 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -4,11 +4,10 @@ #include #include -#include -#include #include -struct siginfo; +struct kernel_siginfo; +struct task_struct; struct cpu_timer_list { struct list_head entry; -- cgit v1.2.3 From 3758b0f86ef502e2f342055caef6d2232c2558b7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 19 Aug 2019 16:31:43 +0200 Subject: alarmtimers: Avoid rtc.h include rtc.h is not needed in alarmtimers when a forward declaration of struct rtc_device is provided. That allows to include posix-timers.h without adding more includes to alarmtimer.h or creating circular include dependencies. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lkml.kernel.org/r/20190819143801.565389536@linutronix.de --- include/linux/alarmtimer.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h index 0760ca1cb009..74748e306f4b 100644 --- a/include/linux/alarmtimer.h +++ b/include/linux/alarmtimer.h @@ -5,7 +5,8 @@ #include #include #include -#include + +struct rtc_device; enum alarmtimer_type { ALARM_REALTIME, -- cgit v1.2.3 From dce3e8fd039cc1b62760b3ad6822cf04c262cd0e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 19 Aug 2019 16:31:47 +0200 Subject: posix-cpu-timers: Remove tsk argument from run_posix_cpu_timers() It's always current. Don't give people wrong ideas. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lkml.kernel.org/r/20190819143801.945469967@linutronix.de --- include/linux/posix-timers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 26c636d1485b..033374b99767 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -118,7 +118,7 @@ struct k_itimer { struct rcu_head rcu; }; -void run_posix_cpu_timers(struct task_struct *task); +void run_posix_cpu_timers(void); void posix_cpu_timers_exit(struct task_struct *task); void posix_cpu_timers_exit_group(struct task_struct *task); void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, -- cgit v1.2.3 From 19298fbf453c90a6cf72288155f80c6f55e9139d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 21 Aug 2019 21:08:51 +0200 Subject: posix-cpu-timers: Provide quick sample function for itimer get_itimer() needs a sample of the current thread group cputime. It invokes thread_group_cputimer() - which is a misnomer. That function also starts eventually the group cputime accouting which is bogus because the accounting is already active when a timer is armed. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lkml.kernel.org/r/20190821192919.599658199@linutronix.de --- include/linux/sched/cputime.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h index 53f883f5a2fd..6de7b384d2be 100644 --- a/include/linux/sched/cputime.h +++ b/include/linux/sched/cputime.h @@ -62,7 +62,7 @@ extern void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, */ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); - +void thread_group_sample_cputime(struct task_struct *tsk, struct task_cputime *times); /* * The following are functions that support scheduler-internal time accounting. -- cgit v1.2.3 From c506bef424ca282f2ad357e86fee940c69018974 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 21 Aug 2019 21:08:54 +0200 Subject: posix-cpu-timers: Rename thread_group_cputimer() and make it static thread_group_cputimer() is a complete misnomer. The function does two things: - For arming process wide timers it makes sure that the atomic time storage is up to date. If no cpu timer is armed yet, then the atomic time storage is not updated by the scheduler for performance reasons. In that case a full summing up of all threads needs to be done and the update needs to be enabled. - Samples the current time into the caller supplied storage. Rename it to thread_group_start_cputime(), make it static and fixup the callsite. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lkml.kernel.org/r/20190821192919.869350319@linutronix.de --- include/linux/sched/cputime.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h index 6de7b384d2be..2638fd0ab3f2 100644 --- a/include/linux/sched/cputime.h +++ b/include/linux/sched/cputime.h @@ -61,7 +61,6 @@ extern void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, * Thread group CPU time accounting. */ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); -void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); void thread_group_sample_cputime(struct task_struct *tsk, struct task_cputime *times); /* -- cgit v1.2.3 From 2b69942f9021bf75bd1b001f53bd2578361fadf3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 21 Aug 2019 21:09:04 +0200 Subject: posix-cpu-timers: Create a container struct Per task/process data of posix CPU timers is all over the place which makes the code hard to follow and requires ifdeffery. Create a container to hold all this information in one place, so data is consolidated and the ifdeffery can be confined to the posix timer header file and removed from places like fork. As a first step, move the cpu_timers list head array into the new struct and clean up the initializers and simplify fork. The remaining #ifdef in fork will be removed later. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lkml.kernel.org/r/20190821192920.819418976@linutronix.de --- include/linux/init_task.h | 11 ----------- include/linux/posix-timers.h | 34 ++++++++++++++++++++++++++++++++++ include/linux/sched.h | 4 +++- include/linux/sched/signal.h | 5 +++-- 4 files changed, 40 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 6049baa5b8bc..2c620d7ac432 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -36,17 +36,6 @@ extern struct cred init_cred; #define INIT_PREV_CPUTIME(x) #endif -#ifdef CONFIG_POSIX_TIMERS -#define INIT_CPU_TIMERS(s) \ - .cpu_timers = { \ - LIST_HEAD_INIT(s.cpu_timers[0]), \ - LIST_HEAD_INIT(s.cpu_timers[1]), \ - LIST_HEAD_INIT(s.cpu_timers[2]), \ - }, -#else -#define INIT_CPU_TIMERS(s) -#endif - #define INIT_TASK_COMM "swapper" /* Attach to the init_task data structure for proper alignment */ diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 033374b99767..cdef89750b2c 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -62,6 +62,40 @@ static inline int clockid_to_fd(const clockid_t clk) return ~(clk >> 3); } +#ifdef CONFIG_POSIX_TIMERS +/** + * posix_cputimers - Container for posix CPU timer related data + * @cpu_timers: List heads to queue posix CPU timers + * + * Used in task_struct and signal_struct + */ +struct posix_cputimers { + struct list_head cpu_timers[CPUCLOCK_MAX]; +}; + +static inline void posix_cputimers_init(struct posix_cputimers *pct) +{ + INIT_LIST_HEAD(&pct->cpu_timers[0]); + INIT_LIST_HEAD(&pct->cpu_timers[1]); + INIT_LIST_HEAD(&pct->cpu_timers[2]); +} + +/* Init task static initializer */ +#define INIT_CPU_TIMERLISTS(c) { \ + LIST_HEAD_INIT(c.cpu_timers[0]), \ + LIST_HEAD_INIT(c.cpu_timers[1]), \ + LIST_HEAD_INIT(c.cpu_timers[2]), \ +} + +#define INIT_CPU_TIMERS(s) \ + .posix_cputimers = { \ + .cpu_timers = INIT_CPU_TIMERLISTS(s.posix_cputimers), \ + }, +#else +struct posix_cputimers { }; +#define INIT_CPU_TIMERS(s) +#endif + #define REQUEUE_PENDING 1 /** diff --git a/include/linux/sched.h b/include/linux/sched.h index 8dc1811487f5..fde844a3b86e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -28,6 +28,7 @@ #include #include #include +#include #include /* task_struct member predeclarations (sorted alphabetically): */ @@ -878,8 +879,9 @@ struct task_struct { #ifdef CONFIG_POSIX_TIMERS struct task_cputime cputime_expires; - struct list_head cpu_timers[3]; #endif + /* Empty if CONFIG_POSIX_CPUTIMERS=n */ + struct posix_cputimers posix_cputimers; /* Process credentials: */ diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index efd8ce7675ed..88fbb3f1c375 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -9,6 +9,7 @@ #include #include #include +#include /* * Types defining task->signal and task->sighand and APIs using them: @@ -151,9 +152,9 @@ struct signal_struct { /* Earliest-expiration cache. */ struct task_cputime cputime_expires; - struct list_head cpu_timers[3]; - #endif + /* Empty if CONFIG_POSIX_TIMERS=n */ + struct posix_cputimers posix_cputimers; /* PID/PID hash table linkage. */ struct pid *pids[PIDTYPE_MAX]; -- cgit v1.2.3 From 9eacb5c7e6607aba00a7322b21cad83fc8b101c8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 21 Aug 2019 21:09:05 +0200 Subject: sched: Move struct task_cputime to types.h For upcoming posix-timer changes to avoid include recursion hell. Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20190821192920.909530418@linutronix.de --- include/linux/sched.h | 17 +---------------- include/linux/sched/types.h | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 16 deletions(-) create mode 100644 include/linux/sched/types.h (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index fde844a3b86e..37c39df9b186 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -245,22 +246,6 @@ struct prev_cputime { #endif }; -/** - * struct task_cputime - collected CPU time counts - * @utime: time spent in user mode, in nanoseconds - * @stime: time spent in kernel mode, in nanoseconds - * @sum_exec_runtime: total time spent on the CPU, in nanoseconds - * - * This structure groups together three kinds of CPU time that are tracked for - * threads and thread groups. Most things considering CPU time want to group - * these counts together and treat all three of them in parallel. - */ -struct task_cputime { - u64 utime; - u64 stime; - unsigned long long sum_exec_runtime; -}; - /* Alternate field names when used on cache expirations: */ #define virt_exp utime #define prof_exp stime diff --git a/include/linux/sched/types.h b/include/linux/sched/types.h new file mode 100644 index 000000000000..2c5c28ddd9b2 --- /dev/null +++ b/include/linux/sched/types.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SCHED_TYPES_H +#define _LINUX_SCHED_TYPES_H + +#include + +/** + * struct task_cputime - collected CPU time counts + * @utime: time spent in user mode, in nanoseconds + * @stime: time spent in kernel mode, in nanoseconds + * @sum_exec_runtime: total time spent on the CPU, in nanoseconds + * + * This structure groups together three kinds of CPU time that are tracked for + * threads and thread groups. Most things considering CPU time want to group + * these counts together and treat all three of them in parallel. + */ +struct task_cputime { + u64 utime; + u64 stime; + unsigned long long sum_exec_runtime; +}; + +#endif -- cgit v1.2.3 From 3a245c0f110e2bfcf7f2cd2248a29005c78999e3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 21 Aug 2019 21:09:06 +0200 Subject: posix-cpu-timers: Move expiry cache into struct posix_cputimers The expiry cache belongs into the posix_cputimers container where the other cpu timers information is. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lkml.kernel.org/r/20190821192921.014444012@linutronix.de --- include/linux/posix-timers.h | 22 ++++++++++++++++++++++ include/linux/sched.h | 8 -------- include/linux/sched/signal.h | 3 --- 3 files changed, 22 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index cdef89750b2c..a3731ba15bce 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -62,24 +62,43 @@ static inline int clockid_to_fd(const clockid_t clk) return ~(clk >> 3); } +/* + * Alternate field names for struct task_cputime when used on cache + * expirations. Will go away soon. + */ +#define virt_exp utime +#define prof_exp stime +#define sched_exp sum_exec_runtime + #ifdef CONFIG_POSIX_TIMERS /** * posix_cputimers - Container for posix CPU timer related data + * @cputime_expires: Earliest-expiration cache * @cpu_timers: List heads to queue posix CPU timers * * Used in task_struct and signal_struct */ struct posix_cputimers { + struct task_cputime cputime_expires; struct list_head cpu_timers[CPUCLOCK_MAX]; }; static inline void posix_cputimers_init(struct posix_cputimers *pct) { + memset(&pct->cputime_expires, 0, sizeof(pct->cputime_expires)); INIT_LIST_HEAD(&pct->cpu_timers[0]); INIT_LIST_HEAD(&pct->cpu_timers[1]); INIT_LIST_HEAD(&pct->cpu_timers[2]); } +void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit); + +static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct, + u64 runtime) +{ + pct->cputime_expires.sched_exp = runtime; +} + /* Init task static initializer */ #define INIT_CPU_TIMERLISTS(c) { \ LIST_HEAD_INIT(c.cpu_timers[0]), \ @@ -94,6 +113,9 @@ static inline void posix_cputimers_init(struct posix_cputimers *pct) #else struct posix_cputimers { }; #define INIT_CPU_TIMERS(s) +static inline void posix_cputimers_init(struct posix_cputimers *pct) { } +static inline void posix_cputimers_group_init(struct posix_cputimers *pct, + u64 cpu_limit) { } #endif #define REQUEUE_PENDING 1 diff --git a/include/linux/sched.h b/include/linux/sched.h index 37c39df9b186..8cc8e323093f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -246,11 +246,6 @@ struct prev_cputime { #endif }; -/* Alternate field names when used on cache expirations: */ -#define virt_exp utime -#define prof_exp stime -#define sched_exp sum_exec_runtime - enum vtime_state { /* Task is sleeping or running in a CPU with VTIME inactive: */ VTIME_INACTIVE = 0, @@ -862,9 +857,6 @@ struct task_struct { unsigned long min_flt; unsigned long maj_flt; -#ifdef CONFIG_POSIX_TIMERS - struct task_cputime cputime_expires; -#endif /* Empty if CONFIG_POSIX_CPUTIMERS=n */ struct posix_cputimers posix_cputimers; diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 88fbb3f1c375..729bd892ee45 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -149,9 +149,6 @@ struct signal_struct { */ struct thread_group_cputimer cputimer; - /* Earliest-expiration cache. */ - struct task_cputime cputime_expires; - #endif /* Empty if CONFIG_POSIX_TIMERS=n */ struct posix_cputimers posix_cputimers; -- cgit v1.2.3 From 11b8462f7e1d25f639c88949a2746a9c2667a766 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 21 Aug 2019 21:09:07 +0200 Subject: posix-cpu-timers: Provide array based access to expiry cache Using struct task_cputime for the expiry cache is a pretty odd choice and comes with magic defines to rename the fields for usage in the expiry cache. struct task_cputime is basically a u64 array with 3 members, but it has distinct members. The expiry cache content is different than the content of task_cputime because expiry[PROF] = task_cputime.stime + task_cputime.utime expiry[VIRT] = task_cputime.utime expiry[SCHED] = task_cputime.sum_exec_runtime So there is no direct mapping between task_cputime and the expiry cache and the #define based remapping is just a horrible hack. Having the expiry cache array based allows further simplification of the expiry code. To avoid an all in one cleanup which is hard to review add a temporary anonymous union into struct task_cputime which allows array based access to it. That requires to reorder the members. Add a build time sanity check to validate that the members are at the same place. The union and the build time checks will be removed after conversion. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lkml.kernel.org/r/20190821192921.105793824@linutronix.de --- include/linux/posix-timers.h | 24 ++++++++++++++++++------ include/linux/sched/types.h | 4 ++-- 2 files changed, 20 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index a3731ba15bce..ed0f6ec30aa6 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -65,27 +65,39 @@ static inline int clockid_to_fd(const clockid_t clk) /* * Alternate field names for struct task_cputime when used on cache * expirations. Will go away soon. + * + * stime corresponds to CLOCKCPU_PROF + * utime corresponds to CLOCKCPU_VIRT + * sum_exex_runtime corresponds to CLOCKCPU_SCHED + * + * The ordering is currently enforced so struct task_cputime and the + * expiries array in struct posix_cputimers are equivalent. */ -#define virt_exp utime #define prof_exp stime +#define virt_exp utime #define sched_exp sum_exec_runtime #ifdef CONFIG_POSIX_TIMERS /** * posix_cputimers - Container for posix CPU timer related data - * @cputime_expires: Earliest-expiration cache + * @cputime_expires: Earliest-expiration cache task_cputime based + * @expiries: Earliest-expiration cache array based * @cpu_timers: List heads to queue posix CPU timers * * Used in task_struct and signal_struct */ struct posix_cputimers { - struct task_cputime cputime_expires; - struct list_head cpu_timers[CPUCLOCK_MAX]; + /* Temporary union until all users are cleaned up */ + union { + struct task_cputime cputime_expires; + u64 expiries[CPUCLOCK_MAX]; + }; + struct list_head cpu_timers[CPUCLOCK_MAX]; }; static inline void posix_cputimers_init(struct posix_cputimers *pct) { - memset(&pct->cputime_expires, 0, sizeof(pct->cputime_expires)); + memset(&pct->expiries, 0, sizeof(pct->expiries)); INIT_LIST_HEAD(&pct->cpu_timers[0]); INIT_LIST_HEAD(&pct->cpu_timers[1]); INIT_LIST_HEAD(&pct->cpu_timers[2]); @@ -96,7 +108,7 @@ void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit); static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct, u64 runtime) { - pct->cputime_expires.sched_exp = runtime; + pct->expiries[CPUCLOCK_SCHED] = runtime; } /* Init task static initializer */ diff --git a/include/linux/sched/types.h b/include/linux/sched/types.h index 2c5c28ddd9b2..3c3e049224ae 100644 --- a/include/linux/sched/types.h +++ b/include/linux/sched/types.h @@ -6,8 +6,8 @@ /** * struct task_cputime - collected CPU time counts - * @utime: time spent in user mode, in nanoseconds * @stime: time spent in kernel mode, in nanoseconds + * @utime: time spent in user mode, in nanoseconds * @sum_exec_runtime: total time spent on the CPU, in nanoseconds * * This structure groups together three kinds of CPU time that are tracked for @@ -15,8 +15,8 @@ * these counts together and treat all three of them in parallel. */ struct task_cputime { - u64 utime; u64 stime; + u64 utime; unsigned long long sum_exec_runtime; }; -- cgit v1.2.3 From bbc9bae1e49bee9862c7f24101a728e73cd9f589 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 21 Aug 2019 21:09:11 +0200 Subject: posix-cpu-timers: Remove the odd field rename defines The last users of the odd define based renaming of struct task_cputime members are gone. Good riddance. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lkml.kernel.org/r/20190821192921.499058279@linutronix.de --- include/linux/posix-timers.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index ed0f6ec30aa6..e36c6fda1af9 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -62,21 +62,6 @@ static inline int clockid_to_fd(const clockid_t clk) return ~(clk >> 3); } -/* - * Alternate field names for struct task_cputime when used on cache - * expirations. Will go away soon. - * - * stime corresponds to CLOCKCPU_PROF - * utime corresponds to CLOCKCPU_VIRT - * sum_exex_runtime corresponds to CLOCKCPU_SCHED - * - * The ordering is currently enforced so struct task_cputime and the - * expiries array in struct posix_cputimers are equivalent. - */ -#define prof_exp stime -#define virt_exp utime -#define sched_exp sum_exec_runtime - #ifdef CONFIG_POSIX_TIMERS /** * posix_cputimers - Container for posix CPU timer related data -- cgit v1.2.3 From 46b883995c88520f2c4de6a64cccc04c69d59f83 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 21 Aug 2019 21:09:14 +0200 Subject: posix-cpu-timers: Remove cputime_expires The last users of the magic struct cputime based expiry cache are gone. Remove the leftovers. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lkml.kernel.org/r/20190821192921.790209622@linutronix.de --- include/linux/posix-timers.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index e36c6fda1af9..fd9098467d6d 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -65,19 +65,14 @@ static inline int clockid_to_fd(const clockid_t clk) #ifdef CONFIG_POSIX_TIMERS /** * posix_cputimers - Container for posix CPU timer related data - * @cputime_expires: Earliest-expiration cache task_cputime based * @expiries: Earliest-expiration cache array based * @cpu_timers: List heads to queue posix CPU timers * * Used in task_struct and signal_struct */ struct posix_cputimers { - /* Temporary union until all users are cleaned up */ - union { - struct task_cputime cputime_expires; - u64 expiries[CPUCLOCK_MAX]; - }; - struct list_head cpu_timers[CPUCLOCK_MAX]; + u64 expiries[CPUCLOCK_MAX]; + struct list_head cpu_timers[CPUCLOCK_MAX]; }; static inline void posix_cputimers_init(struct posix_cputimers *pct) -- cgit v1.2.3 From 87dc64480fb19a6a0fedbdff1e2557be50673287 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 26 Aug 2019 20:22:24 +0200 Subject: posix-cpu-timers: Restructure expiry array Now that the abused struct task_cputime is gone, it's more natural to bundle the expiry cache and the list head of each clock into a struct and have an array of those structs. Follow the hrtimer naming convention of 'bases' and rename the expiry cache to 'nextevt' and adapt all usage sites. Generates also better code .text size shrinks by 80 bytes. Suggested-by: Ingo Molnar Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1908262021140.1939@nanos.tec.linutronix.de --- include/linux/posix-timers.h | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index fd9098467d6d..64bd10d251fe 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -63,24 +63,33 @@ static inline int clockid_to_fd(const clockid_t clk) } #ifdef CONFIG_POSIX_TIMERS + /** - * posix_cputimers - Container for posix CPU timer related data - * @expiries: Earliest-expiration cache array based + * posix_cputimer_base - Container per posix CPU clock + * @nextevt: Earliest-expiration cache * @cpu_timers: List heads to queue posix CPU timers + */ +struct posix_cputimer_base { + u64 nextevt; + struct list_head cpu_timers; +}; + +/** + * posix_cputimers - Container for posix CPU timer related data + * @bases: Base container for posix CPU clocks * * Used in task_struct and signal_struct */ struct posix_cputimers { - u64 expiries[CPUCLOCK_MAX]; - struct list_head cpu_timers[CPUCLOCK_MAX]; + struct posix_cputimer_base bases[CPUCLOCK_MAX]; }; static inline void posix_cputimers_init(struct posix_cputimers *pct) { - memset(&pct->expiries, 0, sizeof(pct->expiries)); - INIT_LIST_HEAD(&pct->cpu_timers[0]); - INIT_LIST_HEAD(&pct->cpu_timers[1]); - INIT_LIST_HEAD(&pct->cpu_timers[2]); + memset(pct->bases, 0, sizeof(pct->bases)); + INIT_LIST_HEAD(&pct->bases[0].cpu_timers); + INIT_LIST_HEAD(&pct->bases[1].cpu_timers); + INIT_LIST_HEAD(&pct->bases[2].cpu_timers); } void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit); @@ -88,19 +97,23 @@ void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit); static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct, u64 runtime) { - pct->expiries[CPUCLOCK_SCHED] = runtime; + pct->bases[CPUCLOCK_SCHED].nextevt = runtime; } /* Init task static initializer */ -#define INIT_CPU_TIMERLISTS(c) { \ - LIST_HEAD_INIT(c.cpu_timers[0]), \ - LIST_HEAD_INIT(c.cpu_timers[1]), \ - LIST_HEAD_INIT(c.cpu_timers[2]), \ +#define INIT_CPU_TIMERBASE(b) { \ + .cpu_timers = LIST_HEAD_INIT(b.cpu_timers), \ +} + +#define INIT_CPU_TIMERBASES(b) { \ + INIT_CPU_TIMERBASE(b[0]), \ + INIT_CPU_TIMERBASE(b[1]), \ + INIT_CPU_TIMERBASE(b[2]), \ } #define INIT_CPU_TIMERS(s) \ .posix_cputimers = { \ - .cpu_timers = INIT_CPU_TIMERLISTS(s.posix_cputimers), \ + .bases = INIT_CPU_TIMERBASES(s.posix_cputimers.bases), \ }, #else struct posix_cputimers { }; -- cgit v1.2.3 From b7be4ef1365dcb56fdffc6689e41058b23f5996d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 21 Aug 2019 21:09:16 +0200 Subject: posix-cpu-timers: Switch thread group sampling to array That allows more simplifications in various places. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lkml.kernel.org/r/20190821192921.988426956@linutronix.de --- include/linux/sched/cputime.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h index 2638fd0ab3f2..eefa5dff16b4 100644 --- a/include/linux/sched/cputime.h +++ b/include/linux/sched/cputime.h @@ -61,7 +61,7 @@ extern void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, * Thread group CPU time accounting. */ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); -void thread_group_sample_cputime(struct task_struct *tsk, struct task_cputime *times); +void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples); /* * The following are functions that support scheduler-internal time accounting. -- cgit v1.2.3 From 2bbdbdae05167c688b6d3499a7dab74208b80a22 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 21 Aug 2019 21:09:19 +0200 Subject: posix-cpu-timers: Get rid of zero checks Deactivation of the expiry cache is done by setting all clock caches to 0. That requires to have a check for zero in all places which update the expiry cache: if (cache == 0 || new < cache) cache = new; Use U64_MAX as the deactivated value, which allows to remove the zero checks when updating the cache and reduces it to the obvious check: if (new < cache) cache = new; This also removes the weird workaround in do_prlimit() which was required to convert a RLIMIT_CPU value of 0 (immediate expiry) to 1 because handing in 0 to the posix CPU timer code would have effectively disarmed it. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lkml.kernel.org/r/20190821192922.275086128@linutronix.de --- include/linux/posix-timers.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 64bd10d251fe..3ea920e8fe7f 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -86,7 +86,9 @@ struct posix_cputimers { static inline void posix_cputimers_init(struct posix_cputimers *pct) { - memset(pct->bases, 0, sizeof(pct->bases)); + pct->bases[0].nextevt = U64_MAX; + pct->bases[1].nextevt = U64_MAX; + pct->bases[2].nextevt = U64_MAX; INIT_LIST_HEAD(&pct->bases[0].cpu_timers); INIT_LIST_HEAD(&pct->bases[1].cpu_timers); INIT_LIST_HEAD(&pct->bases[2].cpu_timers); @@ -102,7 +104,8 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct, /* Init task static initializer */ #define INIT_CPU_TIMERBASE(b) { \ - .cpu_timers = LIST_HEAD_INIT(b.cpu_timers), \ + .nextevt = U64_MAX, \ + .cpu_timers = LIST_HEAD_INIT(b.cpu_timers), \ } #define INIT_CPU_TIMERBASES(b) { \ -- cgit v1.2.3 From 244d49e30653658d4e7e9b2b8427777cbbc5affe Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 21 Aug 2019 21:09:24 +0200 Subject: posix-cpu-timers: Move state tracking to struct posix_cputimers Put it where it belongs and clean up the ifdeffery in fork completely. Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20190821192922.743229404@linutronix.de --- include/linux/posix-timers.h | 8 ++++++++ include/linux/sched/cputime.h | 9 ++++++--- include/linux/sched/signal.h | 6 ------ 3 files changed, 14 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 3ea920e8fe7f..a9e3f69d2db4 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -77,15 +77,23 @@ struct posix_cputimer_base { /** * posix_cputimers - Container for posix CPU timer related data * @bases: Base container for posix CPU clocks + * @timers_active: Timers are queued. + * @expiry_active: Timer expiry is active. Used for + * process wide timers to avoid multiple + * task trying to handle expiry concurrently * * Used in task_struct and signal_struct */ struct posix_cputimers { struct posix_cputimer_base bases[CPUCLOCK_MAX]; + unsigned int timers_active; + unsigned int expiry_active; }; static inline void posix_cputimers_init(struct posix_cputimers *pct) { + pct->timers_active = 0; + pct->expiry_active = 0; pct->bases[0].nextevt = U64_MAX; pct->bases[1].nextevt = U64_MAX; pct->bases[2].nextevt = U64_MAX; diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h index eefa5dff16b4..6c9f19a33865 100644 --- a/include/linux/sched/cputime.h +++ b/include/linux/sched/cputime.h @@ -70,7 +70,7 @@ void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples); */ /** - * get_running_cputimer - return &tsk->signal->cputimer if cputimer is running + * get_running_cputimer - return &tsk->signal->cputimer if cputimers are active * * @tsk: Pointer to target task. */ @@ -80,8 +80,11 @@ struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk) { struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; - /* Check if cputimer isn't running. This is accessed without locking. */ - if (!READ_ONCE(cputimer->running)) + /* + * Check whether posix CPU timers are active. If not the thread + * group accounting is not active either. Lockless check. + */ + if (!READ_ONCE(tsk->signal->posix_cputimers.timers_active)) return NULL; /* diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 729bd892ee45..88050259c466 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -57,18 +57,12 @@ struct task_cputime_atomic { /** * struct thread_group_cputimer - thread group interval timer counts * @cputime_atomic: atomic thread group interval timers. - * @running: true when there are timers running and - * @cputime_atomic receives updates. - * @checking_timer: true when a thread in the group is in the - * process of checking for thread group timers. * * This structure contains the version of task_cputime, above, that is * used for thread group CPU timer calculations. */ struct thread_group_cputimer { struct task_cputime_atomic cputime_atomic; - bool running; - bool checking_timer; }; struct multiprocess_signals { -- cgit v1.2.3 From 60bda037f1dd8151e0c9ee5b09f0c091a0f643cd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 27 Aug 2019 21:31:02 +0200 Subject: posix-cpu-timers: Utilize timerqueue for storage Using a linear O(N) search for timer insertion affects execution time and D-cache footprint badly with a larger number of timers. Switch the storage to a timerqueue which is already used for hrtimers and alarmtimers. It does not affect the size of struct k_itimer as it.alarm is still larger. The extra list head for the expiry list will go away later once the expiry is moved into task work context. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1908272129220.1939@nanos.tec.linutronix.de --- include/linux/posix-timers.h | 65 +++++++++++++++++++++++++++++++++----------- include/linux/timerqueue.h | 10 +++++++ 2 files changed, 59 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index a9e3f69d2db4..f9fbb4c620c1 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -5,17 +5,11 @@ #include #include #include +#include struct kernel_siginfo; struct task_struct; -struct cpu_timer_list { - struct list_head entry; - u64 expires; - struct task_struct *task; - int firing; -}; - /* * Bit fields within a clockid: * @@ -64,14 +58,58 @@ static inline int clockid_to_fd(const clockid_t clk) #ifdef CONFIG_POSIX_TIMERS +/** + * cpu_timer - Posix CPU timer representation for k_itimer + * @node: timerqueue node to queue in the task/sig + * @head: timerqueue head on which this timer is queued + * @task: Pointer to target task + * @elist: List head for the expiry list + * @firing: Timer is currently firing + */ +struct cpu_timer { + struct timerqueue_node node; + struct timerqueue_head *head; + struct task_struct *task; + struct list_head elist; + int firing; +}; + +static inline bool cpu_timer_requeue(struct cpu_timer *ctmr) +{ + return timerqueue_add(ctmr->head, &ctmr->node); +} + +static inline bool cpu_timer_enqueue(struct timerqueue_head *head, + struct cpu_timer *ctmr) +{ + ctmr->head = head; + return timerqueue_add(head, &ctmr->node); +} + +static inline void cpu_timer_dequeue(struct cpu_timer *ctmr) +{ + if (!RB_EMPTY_NODE(&ctmr->node.node)) + timerqueue_del(ctmr->head, &ctmr->node); +} + +static inline u64 cpu_timer_getexpires(struct cpu_timer *ctmr) +{ + return ctmr->node.expires; +} + +static inline void cpu_timer_setexpires(struct cpu_timer *ctmr, u64 exp) +{ + ctmr->node.expires = exp; +} + /** * posix_cputimer_base - Container per posix CPU clock * @nextevt: Earliest-expiration cache - * @cpu_timers: List heads to queue posix CPU timers + * @tqhead: timerqueue head for cpu_timers */ struct posix_cputimer_base { u64 nextevt; - struct list_head cpu_timers; + struct timerqueue_head tqhead; }; /** @@ -92,14 +130,10 @@ struct posix_cputimers { static inline void posix_cputimers_init(struct posix_cputimers *pct) { - pct->timers_active = 0; - pct->expiry_active = 0; + memset(pct, 0, sizeof(*pct)); pct->bases[0].nextevt = U64_MAX; pct->bases[1].nextevt = U64_MAX; pct->bases[2].nextevt = U64_MAX; - INIT_LIST_HEAD(&pct->bases[0].cpu_timers); - INIT_LIST_HEAD(&pct->bases[1].cpu_timers); - INIT_LIST_HEAD(&pct->bases[2].cpu_timers); } void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit); @@ -113,7 +147,6 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct, /* Init task static initializer */ #define INIT_CPU_TIMERBASE(b) { \ .nextevt = U64_MAX, \ - .cpu_timers = LIST_HEAD_INIT(b.cpu_timers), \ } #define INIT_CPU_TIMERBASES(b) { \ @@ -182,7 +215,7 @@ struct k_itimer { struct { struct hrtimer timer; } real; - struct cpu_timer_list cpu; + struct cpu_timer cpu; struct { struct alarm alarmtimer; } alarm; diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h index aff122f1062a..93884086f392 100644 --- a/include/linux/timerqueue.h +++ b/include/linux/timerqueue.h @@ -43,6 +43,16 @@ static inline void timerqueue_init(struct timerqueue_node *node) RB_CLEAR_NODE(&node->node); } +static inline bool timerqueue_node_queued(struct timerqueue_node *node) +{ + return !RB_EMPTY_NODE(&node->node); +} + +static inline bool timerqueue_node_expires(struct timerqueue_node *node) +{ + return node->expires; +} + static inline void timerqueue_init_head(struct timerqueue_head *head) { head->rb_root = RB_ROOT_CACHED; -- cgit v1.2.3 From a67e408241783575716fcf3f79d0878f6cef0273 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 23 Aug 2019 13:38:44 +0200 Subject: hrtimer: Add kernel doc annotation for HRTIMER_MODE_HARD Add kernel doc annotation for HRTIMER_MODE_HARD. Fixes: ae6683d815895 ("hrtimer: Introduce HARD expiry mode") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20190823113845.12125-2-bigeasy@linutronix.de --- include/linux/hrtimer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 5df4bcff96d5..1b9a51a1bccb 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -32,6 +32,8 @@ struct hrtimer_cpu_base; * when starting the timer) * HRTIMER_MODE_SOFT - Timer callback function will be executed in * soft irq context + * HRTIMER_MODE_HARD - Timer callback function will be executed in + * hard irq context even on PREEMPT_RT. */ enum hrtimer_mode { HRTIMER_MODE_ABS = 0x00, -- cgit v1.2.3 From 8f2edb4a78f7f5fa35c025849152b1d2dfaee4eb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 29 Aug 2019 08:19:40 +0200 Subject: posix-timers: Unbreak CONFIG_POSIX_TIMERS=n build The rework of the posix-cpu-timers patch series dropped the empty declaration of struct cpu_timer for the CONFIG_POSIX_TIMERS=n case which causes the build to fail: ./include/linux/posix-timers.h:218:20: error: field 'cpu' has incomplete type Add it back. Fixes: 60bda037f1dd ("posix-cpu-timers: Utilize timerqueue for storage") Reported-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/posix-timers.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index f9fbb4c620c1..e685916551bf 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -161,6 +161,7 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct, }, #else struct posix_cputimers { }; +struct cpu_timer { }; #define INIT_CPU_TIMERS(s) static inline void posix_cputimers_init(struct posix_cputimers *pct) { } static inline void posix_cputimers_group_init(struct posix_cputimers *pct, -- cgit v1.2.3 From 00d9e47f8ec2a293db9ebed86aab0583d9a49533 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 5 Sep 2019 14:03:40 +0200 Subject: posix-cpu-timers: Always clear head pointer on dequeue The head pointer in struct cpu_timer is checked to be NULL in posix_cpu_timer_del() when the delete raced with the exit cleanup. The works correctly as long as the timer is actually dequeued via posix_cpu_timers_exit*(). But if the timer was dequeued due to expiry the head pointer is still set and triggers the warning. In fact keeping the head pointer around after any dequeue is pointless as is has no meaning at all after that. Clear the head pointer always on dequeue and remove the unused requeue function while at it. Fixes: 60bda037f1dd ("posix-cpu-timers: Utilize timerqueue for storage") Reported-by: syzbot+55acd54b57bb4b3840a4@syzkaller.appspotmail.com Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lkml.kernel.org/r/20190905120539.707986830@linutronix.de --- include/linux/posix-timers.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index e685916551bf..3d10c84a97a9 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -74,11 +74,6 @@ struct cpu_timer { int firing; }; -static inline bool cpu_timer_requeue(struct cpu_timer *ctmr) -{ - return timerqueue_add(ctmr->head, &ctmr->node); -} - static inline bool cpu_timer_enqueue(struct timerqueue_head *head, struct cpu_timer *ctmr) { @@ -88,8 +83,10 @@ static inline bool cpu_timer_enqueue(struct timerqueue_head *head, static inline void cpu_timer_dequeue(struct cpu_timer *ctmr) { - if (!RB_EMPTY_NODE(&ctmr->node.node)) + if (ctmr->head) { timerqueue_del(ctmr->head, &ctmr->node); + ctmr->head = NULL; + } } static inline u64 cpu_timer_getexpires(struct cpu_timer *ctmr) -- cgit v1.2.3