diff options
Diffstat (limited to 'include')
| -rw-r--r-- | include/asm-generic/thread_info_tif.h | 5 | ||||
| -rw-r--r-- | include/linux/clockchips.h | 10 | ||||
| -rw-r--r-- | include/linux/clocksource.h | 27 | ||||
| -rw-r--r-- | include/linux/hrtimer.h | 64 | ||||
| -rw-r--r-- | include/linux/hrtimer_defs.h | 83 | ||||
| -rw-r--r-- | include/linux/hrtimer_rearm.h | 83 | ||||
| -rw-r--r-- | include/linux/hrtimer_types.h | 19 | ||||
| -rw-r--r-- | include/linux/irq-entry-common.h | 25 | ||||
| -rw-r--r-- | include/linux/jiffies.h | 6 | ||||
| -rw-r--r-- | include/linux/rbtree.h | 81 | ||||
| -rw-r--r-- | include/linux/rbtree_types.h | 16 | ||||
| -rw-r--r-- | include/linux/rseq_entry.h | 16 | ||||
| -rw-r--r-- | include/linux/timekeeper_internal.h | 8 | ||||
| -rw-r--r-- | include/linux/timerqueue.h | 56 | ||||
| -rw-r--r-- | include/linux/timerqueue_types.h | 15 | ||||
| -rw-r--r-- | include/linux/trace_events.h | 13 | ||||
| -rw-r--r-- | include/trace/events/timer.h | 42 | ||||
| -rw-r--r-- | include/trace/stages/stage3_trace_output.h | 40 |
18 files changed, 427 insertions, 182 deletions
diff --git a/include/asm-generic/thread_info_tif.h b/include/asm-generic/thread_info_tif.h index da1610a78f92..528e6fc7efe9 100644 --- a/include/asm-generic/thread_info_tif.h +++ b/include/asm-generic/thread_info_tif.h @@ -41,11 +41,14 @@ #define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING) #ifdef HAVE_TIF_RESTORE_SIGMASK -# define TIF_RESTORE_SIGMASK 10 // Restore signal mask in do_signal() */ +# define TIF_RESTORE_SIGMASK 10 // Restore signal mask in do_signal() # define _TIF_RESTORE_SIGMASK BIT(TIF_RESTORE_SIGMASK) #endif #define TIF_RSEQ 11 // Run RSEQ fast path #define _TIF_RSEQ BIT(TIF_RSEQ) +#define TIF_HRTIMER_REARM 12 // re-arm the timer +#define _TIF_HRTIMER_REARM BIT(TIF_HRTIMER_REARM) + #endif /* _ASM_GENERIC_THREAD_INFO_TIF_H_ */ diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 50cdc9da8d32..6adb72761246 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -43,9 +43,9 @@ enum clock_event_state { /* * Clock event features */ -# define CLOCK_EVT_FEAT_PERIODIC 0x000001 -# define CLOCK_EVT_FEAT_ONESHOT 0x000002 -# define CLOCK_EVT_FEAT_KTIME 0x000004 +# define CLOCK_EVT_FEAT_PERIODIC 0x000001 +# define CLOCK_EVT_FEAT_ONESHOT 0x000002 +# define CLOCK_EVT_FEAT_CLOCKSOURCE_COUPLED 0x000004 /* * x86(64) specific (mis)features: @@ -73,6 +73,7 @@ enum clock_event_state { * level handler of the event source * @set_next_event: set next event function using a clocksource delta * @set_next_ktime: set next event function using a direct ktime value + * @set_next_coupled: set next event function for clocksource coupled mode * @next_event: local storage for the next event in oneshot mode * @max_delta_ns: maximum delta value in ns * @min_delta_ns: minimum delta value in ns @@ -80,6 +81,7 @@ enum clock_event_state { * @shift: nanoseconds to cycles divisor (power of two) * @state_use_accessors:current state of the device, assigned by the core code * @features: features + * @cs_id: Clocksource ID to denote the clocksource for coupled mode * @next_event_forced: True if the last programming was a forced event * @retries: number of forced programming retries * @set_state_periodic: switch state to periodic @@ -102,6 +104,7 @@ struct clock_event_device { void (*event_handler)(struct clock_event_device *); int (*set_next_event)(unsigned long evt, struct clock_event_device *); int (*set_next_ktime)(ktime_t expires, struct clock_event_device *); + void (*set_next_coupled)(u64 cycles, struct clock_event_device *); ktime_t next_event; u64 max_delta_ns; u64 min_delta_ns; @@ -109,6 +112,7 @@ struct clock_event_device { u32 shift; enum clock_event_state state_use_accessors; unsigned int features; + enum clocksource_ids cs_id; unsigned int next_event_forced; unsigned long retries; diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 65b7c41471c3..ccf5c0ca26b7 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -44,8 +44,6 @@ struct module; * @shift: Cycle to nanosecond divisor (power of two) * @max_idle_ns: Maximum idle time permitted by the clocksource (nsecs) * @maxadj: Maximum adjustment value to mult (~11%) - * @uncertainty_margin: Maximum uncertainty in nanoseconds per half second. - * Zero says to use default WATCHDOG_THRESHOLD. * @archdata: Optional arch-specific data * @max_cycles: Maximum safe cycle value which won't overflow on * multiplication @@ -105,7 +103,6 @@ struct clocksource { u32 shift; u64 max_idle_ns; u32 maxadj; - u32 uncertainty_margin; #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA struct arch_clocksource_data archdata; #endif @@ -133,6 +130,7 @@ struct clocksource { struct list_head wd_list; u64 cs_last; u64 wd_last; + unsigned int wd_cpu; #endif struct module *owner; }; @@ -142,13 +140,19 @@ struct clocksource { */ #define CLOCK_SOURCE_IS_CONTINUOUS 0x01 #define CLOCK_SOURCE_MUST_VERIFY 0x02 +#define CLOCK_SOURCE_CALIBRATED 0x04 #define CLOCK_SOURCE_WATCHDOG 0x10 #define CLOCK_SOURCE_VALID_FOR_HRES 0x20 #define CLOCK_SOURCE_UNSTABLE 0x40 #define CLOCK_SOURCE_SUSPEND_NONSTOP 0x80 #define CLOCK_SOURCE_RESELECT 0x100 -#define CLOCK_SOURCE_VERIFY_PERCPU 0x200 +#define CLOCK_SOURCE_CAN_INLINE_READ 0x200 +#define CLOCK_SOURCE_HAS_COUPLED_CLOCK_EVENT 0x400 + +#define CLOCK_SOURCE_WDTEST 0x800 +#define CLOCK_SOURCE_WDTEST_PERCPU 0x1000 + /* simplify initialization of mask field */ #define CLOCKSOURCE_MASK(bits) GENMASK_ULL((bits) - 1, 0) @@ -298,21 +302,6 @@ static inline void timer_probe(void) {} #define TIMER_ACPI_DECLARE(name, table_id, fn) \ ACPI_DECLARE_PROBE_ENTRY(timer, name, table_id, 0, NULL, 0, fn) -static inline unsigned int clocksource_get_max_watchdog_retry(void) -{ - /* - * When system is in the boot phase or under heavy workload, there - * can be random big latencies during the clocksource/watchdog - * read, so allow retries to filter the noise latency. As the - * latency's frequency and maximum value goes up with the number of - * CPUs, scale the number of retries with the number of online - * CPUs. - */ - return (ilog2(num_online_cpus()) / 2) + 1; -} - -void clocksource_verify_percpu(struct clocksource *cs); - /** * struct clocksource_base - hardware abstraction for clock on which a clocksource * is based diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 74adbd4e7003..9ced498fefaa 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -13,6 +13,7 @@ #define _LINUX_HRTIMER_H #include <linux/hrtimer_defs.h> +#include <linux/hrtimer_rearm.h> #include <linux/hrtimer_types.h> #include <linux/init.h> #include <linux/list.h> @@ -31,6 +32,13 @@ * soft irq context * HRTIMER_MODE_HARD - Timer callback function will be executed in * hard irq context even on PREEMPT_RT. + * HRTIMER_MODE_LAZY_REARM - Avoid reprogramming if the timer was the + * first expiring timer and is moved into the + * future. Special mode for the HRTICK timer to + * avoid extensive reprogramming of the hardware, + * which is expensive in virtual machines. Risks + * a pointless expiry, but that's better than + * reprogramming on every context switch, */ enum hrtimer_mode { HRTIMER_MODE_ABS = 0x00, @@ -38,6 +46,7 @@ enum hrtimer_mode { HRTIMER_MODE_PINNED = 0x02, HRTIMER_MODE_SOFT = 0x04, HRTIMER_MODE_HARD = 0x08, + HRTIMER_MODE_LAZY_REARM = 0x10, HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, @@ -55,33 +64,6 @@ enum hrtimer_mode { HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD, }; -/* - * Values to track state of the timer - * - * Possible states: - * - * 0x00 inactive - * 0x01 enqueued into rbtree - * - * The callback state is not part of the timer->state because clearing it would - * mean touching the timer after the callback, this makes it impossible to free - * the timer from the callback function. - * - * Therefore we track the callback state in: - * - * timer->base->cpu_base->running == timer - * - * On SMP it is possible to have a "callback function running and enqueued" - * status. It happens for example when a posix timer expired and the callback - * queued a signal. Between dropping the lock which protects the posix timer - * and reacquiring the base lock of the hrtimer, another CPU can deliver the - * signal and rearm the timer. - * - * All state transitions are protected by cpu_base->lock. - */ -#define HRTIMER_STATE_INACTIVE 0x00 -#define HRTIMER_STATE_ENQUEUED 0x01 - /** * struct hrtimer_sleeper - simple sleeper structure * @timer: embedded timer structure @@ -134,11 +116,6 @@ static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer) return timer->_softexpires; } -static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer) -{ - return ktime_to_ns(timer->node.expires); -} - ktime_t hrtimer_cb_get_time(const struct hrtimer *timer); static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) @@ -146,24 +123,23 @@ static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) return ktime_sub(timer->node.expires, hrtimer_cb_get_time(timer)); } -static inline int hrtimer_is_hres_active(struct hrtimer *timer) -{ - return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? - timer->base->cpu_base->hres_active : 0; -} - #ifdef CONFIG_HIGH_RES_TIMERS +extern unsigned int hrtimer_resolution; struct clock_event_device; extern void hrtimer_interrupt(struct clock_event_device *dev); -extern unsigned int hrtimer_resolution; +extern struct static_key_false hrtimer_highres_enabled_key; -#else +static inline bool hrtimer_highres_enabled(void) +{ + return static_branch_likely(&hrtimer_highres_enabled_key); +} +#else /* CONFIG_HIGH_RES_TIMERS */ #define hrtimer_resolution (unsigned int)LOW_RES_NSEC - -#endif +static inline bool hrtimer_highres_enabled(void) { return false; } +#endif /* !CONFIG_HIGH_RES_TIMERS */ static inline ktime_t __hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now) @@ -293,8 +269,8 @@ extern bool hrtimer_active(const struct hrtimer *timer); */ static inline bool hrtimer_is_queued(struct hrtimer *timer) { - /* The READ_ONCE pairs with the update functions of timer->state */ - return !!(READ_ONCE(timer->state) & HRTIMER_STATE_ENQUEUED); + /* The READ_ONCE pairs with the update functions of timer->is_queued */ + return READ_ONCE(timer->is_queued); } /* diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h index 02b010df6570..52ed9e46ff13 100644 --- a/include/linux/hrtimer_defs.h +++ b/include/linux/hrtimer_defs.h @@ -19,21 +19,23 @@ * timer to a base on another cpu. * @clockid: clock id for per_cpu support * @seq: seqcount around __run_hrtimer + * @expires_next: Absolute time of the next event in this clock base * @running: pointer to the currently running hrtimer * @active: red black tree root node for the active timers * @offset: offset of this clock to the monotonic base */ struct hrtimer_clock_base { - struct hrtimer_cpu_base *cpu_base; - unsigned int index; - clockid_t clockid; - seqcount_raw_spinlock_t seq; - struct hrtimer *running; - struct timerqueue_head active; - ktime_t offset; + struct hrtimer_cpu_base *cpu_base; + const unsigned int index; + const clockid_t clockid; + seqcount_raw_spinlock_t seq; + ktime_t expires_next; + struct hrtimer *running; + struct timerqueue_linked_head active; + ktime_t offset; } __hrtimer_clock_base_align; -enum hrtimer_base_type { +enum hrtimer_base_type { HRTIMER_BASE_MONOTONIC, HRTIMER_BASE_REALTIME, HRTIMER_BASE_BOOTTIME, @@ -42,37 +44,36 @@ enum hrtimer_base_type { HRTIMER_BASE_REALTIME_SOFT, HRTIMER_BASE_BOOTTIME_SOFT, HRTIMER_BASE_TAI_SOFT, - HRTIMER_MAX_CLOCK_BASES, + HRTIMER_MAX_CLOCK_BASES }; /** * struct hrtimer_cpu_base - the per cpu clock bases - * @lock: lock protecting the base and associated clock bases - * and timers - * @cpu: cpu number - * @active_bases: Bitfield to mark bases with active timers - * @clock_was_set_seq: Sequence counter of clock was set events - * @hres_active: State of high resolution mode - * @in_hrtirq: hrtimer_interrupt() is currently executing - * @hang_detected: The last hrtimer interrupt detected a hang - * @softirq_activated: displays, if the softirq is raised - update of softirq - * related settings is not required then. - * @nr_events: Total number of hrtimer interrupt events - * @nr_retries: Total number of hrtimer interrupt retries - * @nr_hangs: Total number of hrtimer interrupt hangs - * @max_hang_time: Maximum time spent in hrtimer_interrupt - * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are - * expired - * @online: CPU is online from an hrtimers point of view - * @timer_waiters: A hrtimer_cancel() invocation waits for the timer - * callback to finish. - * @expires_next: absolute time of the next event, is required for remote - * hrtimer enqueue; it is the total first expiry time (hard - * and soft hrtimer are taken into account) - * @next_timer: Pointer to the first expiring timer - * @softirq_expires_next: Time to check, if soft queues needs also to be expired - * @softirq_next_timer: Pointer to the first expiring softirq based timer - * @clock_base: array of clock bases for this cpu + * @lock: lock protecting the base and associated clock bases and timers + * @cpu: cpu number + * @active_bases: Bitfield to mark bases with active timers + * @clock_was_set_seq: Sequence counter of clock was set events + * @hres_active: State of high resolution mode + * @deferred_rearm: A deferred rearm is pending + * @deferred_needs_update: The deferred rearm must re-evaluate the first timer + * @hang_detected: The last hrtimer interrupt detected a hang + * @softirq_activated: displays, if the softirq is raised - update of softirq + * related settings is not required then. + * @nr_events: Total number of hrtimer interrupt events + * @nr_retries: Total number of hrtimer interrupt retries + * @nr_hangs: Total number of hrtimer interrupt hangs + * @max_hang_time: Maximum time spent in hrtimer_interrupt + * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are expired + * @online: CPU is online from an hrtimers point of view + * @timer_waiters: A hrtimer_cancel() waiters for the timer callback to finish. + * @expires_next: Absolute time of the next event, is required for remote + * hrtimer enqueue; it is the total first expiry time (hard + * and soft hrtimer are taken into account) + * @next_timer: Pointer to the first expiring timer + * @softirq_expires_next: Time to check, if soft queues needs also to be expired + * @softirq_next_timer: Pointer to the first expiring softirq based timer + * @deferred_expires_next: Cached expires next value for deferred rearm + * @clock_base: Array of clock bases for this cpu * * Note: next_timer is just an optimization for __remove_hrtimer(). * Do not dereference the pointer because it is not reliable on @@ -83,11 +84,12 @@ struct hrtimer_cpu_base { unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; - unsigned int hres_active : 1, - in_hrtirq : 1, - hang_detected : 1, - softirq_activated : 1, - online : 1; + bool hres_active; + bool deferred_rearm; + bool deferred_needs_update; + bool hang_detected; + bool softirq_activated; + bool online; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int nr_events; unsigned short nr_retries; @@ -102,6 +104,7 @@ struct hrtimer_cpu_base { struct hrtimer *next_timer; ktime_t softirq_expires_next; struct hrtimer *softirq_next_timer; + ktime_t deferred_expires_next; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; call_single_data_t csd; } ____cacheline_aligned; diff --git a/include/linux/hrtimer_rearm.h b/include/linux/hrtimer_rearm.h new file mode 100644 index 000000000000..a6f2e5d5e1c7 --- /dev/null +++ b/include/linux/hrtimer_rearm.h @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _LINUX_HRTIMER_REARM_H +#define _LINUX_HRTIMER_REARM_H + +#ifdef CONFIG_HRTIMER_REARM_DEFERRED +#include <linux/thread_info.h> + +void __hrtimer_rearm_deferred(void); + +/* + * This is purely CPU local, so check the TIF bit first to avoid the overhead of + * the atomic test_and_clear_bit() operation for the common case where the bit + * is not set. + */ +static __always_inline bool hrtimer_test_and_clear_rearm_deferred_tif(unsigned long tif_work) +{ + lockdep_assert_irqs_disabled(); + + if (unlikely(tif_work & _TIF_HRTIMER_REARM)) { + clear_thread_flag(TIF_HRTIMER_REARM); + return true; + } + return false; +} + +#define TIF_REARM_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | _TIF_HRTIMER_REARM) + +/* Invoked from the exit to user before invoking exit_to_user_mode_loop() */ +static __always_inline bool +hrtimer_rearm_deferred_user_irq(unsigned long *tif_work, const unsigned long tif_mask) +{ + /* Help the compiler to optimize the function out for syscall returns */ + if (!(tif_mask & _TIF_HRTIMER_REARM)) + return false; + /* + * Rearm the timer if none of the resched flags is set before going into + * the loop which re-enables interrupts. + */ + if (unlikely((*tif_work & TIF_REARM_MASK) == _TIF_HRTIMER_REARM)) { + clear_thread_flag(TIF_HRTIMER_REARM); + __hrtimer_rearm_deferred(); + /* Don't go into the loop if HRTIMER_REARM was the only flag */ + *tif_work &= ~TIF_HRTIMER_REARM; + return !*tif_work; + } + return false; +} + +/* Invoked from the time slice extension decision function */ +static __always_inline void hrtimer_rearm_deferred_tif(unsigned long tif_work) +{ + if (hrtimer_test_and_clear_rearm_deferred_tif(tif_work)) + __hrtimer_rearm_deferred(); +} + +/* + * This is to be called on all irqentry_exit() paths that will enable + * interrupts. + */ +static __always_inline void hrtimer_rearm_deferred(void) +{ + hrtimer_rearm_deferred_tif(read_thread_flags()); +} + +/* + * Invoked from the scheduler on entry to __schedule() so it can defer + * rearming after the load balancing callbacks which might change hrtick. + */ +static __always_inline bool hrtimer_test_and_clear_rearm_deferred(void) +{ + return hrtimer_test_and_clear_rearm_deferred_tif(read_thread_flags()); +} + +#else /* CONFIG_HRTIMER_REARM_DEFERRED */ +static __always_inline void __hrtimer_rearm_deferred(void) { } +static __always_inline void hrtimer_rearm_deferred(void) { } +static __always_inline void hrtimer_rearm_deferred_tif(unsigned long tif_work) { } +static __always_inline bool +hrtimer_rearm_deferred_user_irq(unsigned long *tif_work, const unsigned long tif_mask) { return false; } +static __always_inline bool hrtimer_test_and_clear_rearm_deferred(void) { return false; } +#endif /* !CONFIG_HRTIMER_REARM_DEFERRED */ + +#endif diff --git a/include/linux/hrtimer_types.h b/include/linux/hrtimer_types.h index 8fbbb6bdf7a1..b5dacc8271a4 100644 --- a/include/linux/hrtimer_types.h +++ b/include/linux/hrtimer_types.h @@ -17,7 +17,7 @@ enum hrtimer_restart { /** * struct hrtimer - the basic hrtimer structure - * @node: timerqueue node, which also manages node.expires, + * @node: Linked timerqueue node, which also manages node.expires, * the absolute expiry time in the hrtimers internal * representation. The time is related to the clock on * which the timer is based. Is setup by adding @@ -28,23 +28,26 @@ enum hrtimer_restart { * was armed. * @function: timer expiry callback function * @base: pointer to the timer base (per cpu and per clock) - * @state: state information (See bit values above) + * @is_queued: Indicates whether a timer is enqueued or not * @is_rel: Set if the timer was armed relative * @is_soft: Set if hrtimer will be expired in soft interrupt context. * @is_hard: Set if hrtimer will be expired in hard interrupt context * even on RT. + * @is_lazy: Set if the timer is frequently rearmed to avoid updates + * of the clock event device * * The hrtimer structure must be initialized by hrtimer_setup() */ struct hrtimer { - struct timerqueue_node node; + struct timerqueue_linked_node node; + struct hrtimer_clock_base *base; + bool is_queued; + bool is_rel; + bool is_soft; + bool is_hard; + bool is_lazy; ktime_t _softexpires; enum hrtimer_restart (*__private function)(struct hrtimer *); - struct hrtimer_clock_base *base; - u8 state; - u8 is_rel; - u8 is_soft; - u8 is_hard; }; #endif /* _LINUX_HRTIMER_TYPES_H */ diff --git a/include/linux/irq-entry-common.h b/include/linux/irq-entry-common.h index d26d1b1bcbfb..b976946b3cdb 100644 --- a/include/linux/irq-entry-common.h +++ b/include/linux/irq-entry-common.h @@ -3,6 +3,7 @@ #define __LINUX_IRQENTRYCOMMON_H #include <linux/context_tracking.h> +#include <linux/hrtimer_rearm.h> #include <linux/kmsan.h> #include <linux/rseq_entry.h> #include <linux/static_call_types.h> @@ -33,6 +34,14 @@ _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | _TIF_RSEQ | \ ARCH_EXIT_TO_USER_MODE_WORK) +#ifdef CONFIG_HRTIMER_REARM_DEFERRED +# define EXIT_TO_USER_MODE_WORK_SYSCALL (EXIT_TO_USER_MODE_WORK) +# define EXIT_TO_USER_MODE_WORK_IRQ (EXIT_TO_USER_MODE_WORK | _TIF_HRTIMER_REARM) +#else +# define EXIT_TO_USER_MODE_WORK_SYSCALL (EXIT_TO_USER_MODE_WORK) +# define EXIT_TO_USER_MODE_WORK_IRQ (EXIT_TO_USER_MODE_WORK) +#endif + /** * arch_enter_from_user_mode - Architecture specific sanity check for user mode regs * @regs: Pointer to currents pt_regs @@ -203,6 +212,7 @@ unsigned long exit_to_user_mode_loop(struct pt_regs *regs, unsigned long ti_work /** * __exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required * @regs: Pointer to pt_regs on entry stack + * @work_mask: Which TIF bits need to be evaluated * * 1) check that interrupts are disabled * 2) call tick_nohz_user_enter_prepare() @@ -212,7 +222,8 @@ unsigned long exit_to_user_mode_loop(struct pt_regs *regs, unsigned long ti_work * * Don't invoke directly, use the syscall/irqentry_ prefixed variants below */ -static __always_inline void __exit_to_user_mode_prepare(struct pt_regs *regs) +static __always_inline void __exit_to_user_mode_prepare(struct pt_regs *regs, + const unsigned long work_mask) { unsigned long ti_work; @@ -222,8 +233,10 @@ static __always_inline void __exit_to_user_mode_prepare(struct pt_regs *regs) tick_nohz_user_enter_prepare(); ti_work = read_thread_flags(); - if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK)) - ti_work = exit_to_user_mode_loop(regs, ti_work); + if (unlikely(ti_work & work_mask)) { + if (!hrtimer_rearm_deferred_user_irq(&ti_work, work_mask)) + ti_work = exit_to_user_mode_loop(regs, ti_work); + } arch_exit_to_user_mode_prepare(regs, ti_work); } @@ -239,7 +252,7 @@ static __always_inline void __exit_to_user_mode_validate(void) /* Temporary workaround to keep ARM64 alive */ static __always_inline void exit_to_user_mode_prepare_legacy(struct pt_regs *regs) { - __exit_to_user_mode_prepare(regs); + __exit_to_user_mode_prepare(regs, EXIT_TO_USER_MODE_WORK); rseq_exit_to_user_mode_legacy(); __exit_to_user_mode_validate(); } @@ -253,7 +266,7 @@ static __always_inline void exit_to_user_mode_prepare_legacy(struct pt_regs *reg */ static __always_inline void syscall_exit_to_user_mode_prepare(struct pt_regs *regs) { - __exit_to_user_mode_prepare(regs); + __exit_to_user_mode_prepare(regs, EXIT_TO_USER_MODE_WORK_SYSCALL); rseq_syscall_exit_to_user_mode(); __exit_to_user_mode_validate(); } @@ -267,7 +280,7 @@ static __always_inline void syscall_exit_to_user_mode_prepare(struct pt_regs *re */ static __always_inline void irqentry_exit_to_user_mode_prepare(struct pt_regs *regs) { - __exit_to_user_mode_prepare(regs); + __exit_to_user_mode_prepare(regs, EXIT_TO_USER_MODE_WORK_IRQ); rseq_irqentry_exit_to_user_mode(); __exit_to_user_mode_validate(); } diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index d1c3d4941854..bbd57061802c 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -67,10 +67,6 @@ extern void register_refined_jiffies(long clock_tick_rate); /* USER_TICK_USEC is the time between ticks in usec assuming fake USER_HZ */ #define USER_TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ) -#ifndef __jiffy_arch_data -#define __jiffy_arch_data -#endif - /* * The 64-bit value is not atomic on 32-bit systems - you MUST NOT read it * without sampling the sequence number in jiffies_lock. @@ -83,7 +79,7 @@ extern void register_refined_jiffies(long clock_tick_rate); * See arch/ARCH/kernel/vmlinux.lds.S */ extern u64 __cacheline_aligned_in_smp jiffies_64; -extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffies; +extern unsigned long volatile __cacheline_aligned_in_smp jiffies; #if (BITS_PER_LONG < 64) u64 get_jiffies_64(void); diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 4091e978aef2..48acdc3889dd 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -35,10 +35,15 @@ #define RB_CLEAR_NODE(node) \ ((node)->__rb_parent_color = (unsigned long)(node)) +#define RB_EMPTY_LINKED_NODE(lnode) RB_EMPTY_NODE(&(lnode)->node) +#define RB_CLEAR_LINKED_NODE(lnode) ({ \ + RB_CLEAR_NODE(&(lnode)->node); \ + (lnode)->prev = (lnode)->next = NULL; \ +}) extern void rb_insert_color(struct rb_node *, struct rb_root *); extern void rb_erase(struct rb_node *, struct rb_root *); - +extern bool rb_erase_linked(struct rb_node_linked *, struct rb_root_linked *); /* Find logical next and previous nodes in a tree */ extern struct rb_node *rb_next(const struct rb_node *); @@ -213,15 +218,10 @@ rb_add_cached(struct rb_node *node, struct rb_root_cached *tree, return leftmost ? node : NULL; } -/** - * rb_add() - insert @node into @tree - * @node: node to insert - * @tree: tree to insert @node into - * @less: operator defining the (partial) node order - */ static __always_inline void -rb_add(struct rb_node *node, struct rb_root *tree, - bool (*less)(struct rb_node *, const struct rb_node *)) +__rb_add(struct rb_node *node, struct rb_root *tree, + bool (*less)(struct rb_node *, const struct rb_node *), + void (*linkop)(struct rb_node *, struct rb_node *, struct rb_node **)) { struct rb_node **link = &tree->rb_node; struct rb_node *parent = NULL; @@ -234,10 +234,73 @@ rb_add(struct rb_node *node, struct rb_root *tree, link = &parent->rb_right; } + linkop(node, parent, link); rb_link_node(node, parent, link); rb_insert_color(node, tree); } +#define __node_2_linked_node(_n) \ + rb_entry((_n), struct rb_node_linked, node) + +static inline void +rb_link_linked_node(struct rb_node *node, struct rb_node *parent, struct rb_node **link) +{ + if (!parent) + return; + + struct rb_node_linked *nnew = __node_2_linked_node(node); + struct rb_node_linked *npar = __node_2_linked_node(parent); + + if (link == &parent->rb_left) { + nnew->prev = npar->prev; + nnew->next = npar; + npar->prev = nnew; + if (nnew->prev) + nnew->prev->next = nnew; + } else { + nnew->next = npar->next; + nnew->prev = npar; + npar->next = nnew; + if (nnew->next) + nnew->next->prev = nnew; + } +} + +/** + * rb_add_linked() - insert @node into the leftmost linked tree @tree + * @node: node to insert + * @tree: linked tree to insert @node into + * @less: operator defining the (partial) node order + * + * Returns @true when @node is the new leftmost, @false otherwise. + */ +static __always_inline bool +rb_add_linked(struct rb_node_linked *node, struct rb_root_linked *tree, + bool (*less)(struct rb_node *, const struct rb_node *)) +{ + __rb_add(&node->node, &tree->rb_root, less, rb_link_linked_node); + if (!node->prev) + tree->rb_leftmost = node; + return !node->prev; +} + +/* Empty linkop function which is optimized away by the compiler */ +static __always_inline void +rb_link_noop(struct rb_node *n, struct rb_node *p, struct rb_node **l) { } + +/** + * rb_add() - insert @node into @tree + * @node: node to insert + * @tree: tree to insert @node into + * @less: operator defining the (partial) node order + */ +static __always_inline void +rb_add(struct rb_node *node, struct rb_root *tree, + bool (*less)(struct rb_node *, const struct rb_node *)) +{ + __rb_add(node, tree, less, rb_link_noop); +} + /** * rb_find_add_cached() - find equivalent @node in @tree, or add @node * @node: node to look-for / insert diff --git a/include/linux/rbtree_types.h b/include/linux/rbtree_types.h index 45b6ecde3665..3c7ae53e8139 100644 --- a/include/linux/rbtree_types.h +++ b/include/linux/rbtree_types.h @@ -9,6 +9,12 @@ struct rb_node { } __attribute__((aligned(sizeof(long)))); /* The alignment might seem pointless, but allegedly CRIS needs it */ +struct rb_node_linked { + struct rb_node node; + struct rb_node_linked *prev; + struct rb_node_linked *next; +}; + struct rb_root { struct rb_node *rb_node; }; @@ -28,7 +34,17 @@ struct rb_root_cached { struct rb_node *rb_leftmost; }; +/* + * Leftmost tree with links. This would allow a trivial rb_rightmost update, + * but that has been omitted due to the lack of users. + */ +struct rb_root_linked { + struct rb_root rb_root; + struct rb_node_linked *rb_leftmost; +}; + #define RB_ROOT (struct rb_root) { NULL, } #define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL } +#define RB_ROOT_LINKED (struct rb_root_linked) { {NULL, }, NULL } #endif diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h index c6831c93cd6e..f11ebd34f8b9 100644 --- a/include/linux/rseq_entry.h +++ b/include/linux/rseq_entry.h @@ -40,6 +40,7 @@ DECLARE_PER_CPU(struct rseq_stats, rseq_stats); #endif /* !CONFIG_RSEQ_STATS */ #ifdef CONFIG_RSEQ +#include <linux/hrtimer_rearm.h> #include <linux/jump_label.h> #include <linux/rseq.h> #include <linux/sched/signal.h> @@ -110,7 +111,7 @@ static __always_inline void rseq_slice_clear_grant(struct task_struct *t) t->rseq.slice.state.granted = false; } -static __always_inline bool rseq_grant_slice_extension(bool work_pending) +static __always_inline bool __rseq_grant_slice_extension(bool work_pending) { struct task_struct *curr = current; struct rseq_slice_ctrl usr_ctrl; @@ -215,11 +216,20 @@ efault: return false; } +static __always_inline bool rseq_grant_slice_extension(unsigned long ti_work, unsigned long mask) +{ + if (unlikely(__rseq_grant_slice_extension(ti_work & mask))) { + hrtimer_rearm_deferred_tif(ti_work); + return true; + } + return false; +} + #else /* CONFIG_RSEQ_SLICE_EXTENSION */ static __always_inline bool rseq_slice_extension_enabled(void) { return false; } static __always_inline bool rseq_arm_slice_extension_timer(void) { return false; } static __always_inline void rseq_slice_clear_grant(struct task_struct *t) { } -static __always_inline bool rseq_grant_slice_extension(bool work_pending) { return false; } +static __always_inline bool rseq_grant_slice_extension(unsigned long ti_work, unsigned long mask) { return false; } #endif /* !CONFIG_RSEQ_SLICE_EXTENSION */ bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr); @@ -778,7 +788,7 @@ static inline void rseq_syscall_exit_to_user_mode(void) { } static inline void rseq_irqentry_exit_to_user_mode(void) { } static inline void rseq_exit_to_user_mode_legacy(void) { } static inline void rseq_debug_syscall_return(struct pt_regs *regs) { } -static inline bool rseq_grant_slice_extension(bool work_pending) { return false; } +static inline bool rseq_grant_slice_extension(unsigned long ti_work, unsigned long mask) { return false; } #endif /* !CONFIG_RSEQ */ #endif /* _LINUX_RSEQ_ENTRY_H */ diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index b8ae89ea28ab..e36d11e33e0c 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -72,6 +72,10 @@ struct tk_read_base { * @id: The timekeeper ID * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds + * @cs_id: The ID of the current clocksource + * @cs_ns_to_cyc_mult: Multiplicator for nanoseconds to cycles conversion + * @cs_ns_to_cyc_shift: Shift value for nanoseconds to cycles conversion + * @cs_ns_to_cyc_maxns: Maximum nanoseconds to cyles conversion range * @clock_was_set_seq: The sequence number of clock was set events * @cs_was_changed_seq: The sequence number of clocksource change events * @clock_valid: Indicator for valid clock @@ -159,6 +163,10 @@ struct timekeeper { u64 raw_sec; /* Cachline 3 and 4 (timekeeping internal variables): */ + enum clocksource_ids cs_id; + u32 cs_ns_to_cyc_mult; + u32 cs_ns_to_cyc_shift; + u64 cs_ns_to_cyc_maxns; unsigned int clock_was_set_seq; u8 cs_was_changed_seq; u8 clock_valid; diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h index d306d9dd2207..7d0aaa766580 100644 --- a/include/linux/timerqueue.h +++ b/include/linux/timerqueue.h @@ -5,12 +5,11 @@ #include <linux/rbtree.h> #include <linux/timerqueue_types.h> -extern bool timerqueue_add(struct timerqueue_head *head, - struct timerqueue_node *node); -extern bool timerqueue_del(struct timerqueue_head *head, - struct timerqueue_node *node); -extern struct timerqueue_node *timerqueue_iterate_next( - struct timerqueue_node *node); +bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node); +bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node); +struct timerqueue_node *timerqueue_iterate_next(struct timerqueue_node *node); + +bool timerqueue_linked_add(struct timerqueue_linked_head *head, struct timerqueue_linked_node *node); /** * timerqueue_getnext - Returns the timer with the earliest expiration time @@ -19,8 +18,7 @@ extern struct timerqueue_node *timerqueue_iterate_next( * * Returns a pointer to the timer node that has the earliest expiration time. */ -static inline -struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head) +static inline struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head) { struct rb_node *leftmost = rb_first_cached(&head->rb_root); @@ -41,4 +39,46 @@ static inline void timerqueue_init_head(struct timerqueue_head *head) { head->rb_root = RB_ROOT_CACHED; } + +/* Timer queues with linked nodes */ + +static __always_inline +struct timerqueue_linked_node *timerqueue_linked_first(struct timerqueue_linked_head *head) +{ + return rb_entry_safe(head->rb_root.rb_leftmost, struct timerqueue_linked_node, node); +} + +static __always_inline +struct timerqueue_linked_node *timerqueue_linked_next(struct timerqueue_linked_node *node) +{ + return rb_entry_safe(node->node.next, struct timerqueue_linked_node, node); +} + +static __always_inline +struct timerqueue_linked_node *timerqueue_linked_prev(struct timerqueue_linked_node *node) +{ + return rb_entry_safe(node->node.prev, struct timerqueue_linked_node, node); +} + +static __always_inline +bool timerqueue_linked_del(struct timerqueue_linked_head *head, struct timerqueue_linked_node *node) +{ + return rb_erase_linked(&node->node, &head->rb_root); +} + +static __always_inline void timerqueue_linked_init(struct timerqueue_linked_node *node) +{ + RB_CLEAR_LINKED_NODE(&node->node); +} + +static __always_inline bool timerqueue_linked_node_queued(struct timerqueue_linked_node *node) +{ + return !RB_EMPTY_LINKED_NODE(&node->node); +} + +static __always_inline void timerqueue_linked_init_head(struct timerqueue_linked_head *head) +{ + head->rb_root = RB_ROOT_LINKED; +} + #endif /* _LINUX_TIMERQUEUE_H */ diff --git a/include/linux/timerqueue_types.h b/include/linux/timerqueue_types.h index dc298d0923e3..be2218b147c4 100644 --- a/include/linux/timerqueue_types.h +++ b/include/linux/timerqueue_types.h @@ -6,12 +6,21 @@ #include <linux/types.h> struct timerqueue_node { - struct rb_node node; - ktime_t expires; + struct rb_node node; + ktime_t expires; }; struct timerqueue_head { - struct rb_root_cached rb_root; + struct rb_root_cached rb_root; +}; + +struct timerqueue_linked_node { + struct rb_node_linked node; + ktime_t expires; +}; + +struct timerqueue_linked_head { + struct rb_root_linked rb_root; }; #endif /* _LINUX_TIMERQUEUE_TYPES_H */ diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 37eb2f0f3dd8..40a43a4c7caf 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -22,20 +22,23 @@ union bpf_attr; const char *trace_print_flags_seq(struct trace_seq *p, const char *delim, unsigned long flags, - const struct trace_print_flags *flag_array); + const struct trace_print_flags *flag_array, + size_t flag_array_size); const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val, - const struct trace_print_flags *symbol_array); + const struct trace_print_flags *symbol_array, + size_t symbol_array_size); #if BITS_PER_LONG == 32 const char *trace_print_flags_seq_u64(struct trace_seq *p, const char *delim, unsigned long long flags, - const struct trace_print_flags_u64 *flag_array); + const struct trace_print_flags_u64 *flag_array, + size_t flag_array_size); const char *trace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, - const struct trace_print_flags_u64 - *symbol_array); + const struct trace_print_flags_u64 *symbol_array, + size_t symbol_array_size); #endif struct trace_iterator; diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 1641ae3e6ca0..07cbb9836b91 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -218,12 +218,13 @@ TRACE_EVENT(hrtimer_setup, * hrtimer_start - called when the hrtimer is started * @hrtimer: pointer to struct hrtimer * @mode: the hrtimers mode + * @was_armed: Was armed when hrtimer_start*() was invoked */ TRACE_EVENT(hrtimer_start, - TP_PROTO(struct hrtimer *hrtimer, enum hrtimer_mode mode), + TP_PROTO(struct hrtimer *hrtimer, enum hrtimer_mode mode, bool was_armed), - TP_ARGS(hrtimer, mode), + TP_ARGS(hrtimer, mode, was_armed), TP_STRUCT__entry( __field( void *, hrtimer ) @@ -231,6 +232,7 @@ TRACE_EVENT(hrtimer_start, __field( s64, expires ) __field( s64, softexpires ) __field( enum hrtimer_mode, mode ) + __field( bool, was_armed ) ), TP_fast_assign( @@ -239,26 +241,26 @@ TRACE_EVENT(hrtimer_start, __entry->expires = hrtimer_get_expires(hrtimer); __entry->softexpires = hrtimer_get_softexpires(hrtimer); __entry->mode = mode; + __entry->was_armed = was_armed; ), TP_printk("hrtimer=%p function=%ps expires=%llu softexpires=%llu " - "mode=%s", __entry->hrtimer, __entry->function, + "mode=%s was_armed=%d", __entry->hrtimer, __entry->function, (unsigned long long) __entry->expires, (unsigned long long) __entry->softexpires, - decode_hrtimer_mode(__entry->mode)) + decode_hrtimer_mode(__entry->mode), __entry->was_armed) ); /** * hrtimer_expire_entry - called immediately before the hrtimer callback * @hrtimer: pointer to struct hrtimer - * @now: pointer to variable which contains current time of the - * timers base. + * @now: variable which contains current time of the timers base. * * Allows to determine the timer latency. */ TRACE_EVENT(hrtimer_expire_entry, - TP_PROTO(struct hrtimer *hrtimer, ktime_t *now), + TP_PROTO(struct hrtimer *hrtimer, ktime_t now), TP_ARGS(hrtimer, now), @@ -270,7 +272,7 @@ TRACE_EVENT(hrtimer_expire_entry, TP_fast_assign( __entry->hrtimer = hrtimer; - __entry->now = *now; + __entry->now = now; __entry->function = ACCESS_PRIVATE(hrtimer, function); ), @@ -322,6 +324,30 @@ DEFINE_EVENT(hrtimer_class, hrtimer_cancel, ); /** + * hrtimer_rearm - Invoked when the clockevent device is rearmed + * @next_event: The next expiry time (CLOCK_MONOTONIC) + */ +TRACE_EVENT(hrtimer_rearm, + + TP_PROTO(ktime_t next_event, bool deferred), + + TP_ARGS(next_event, deferred), + + TP_STRUCT__entry( + __field( s64, next_event ) + __field( bool, deferred ) + ), + + TP_fast_assign( + __entry->next_event = next_event; + __entry->deferred = deferred; + ), + + TP_printk("next_event=%llu deferred=%d", + (unsigned long long) __entry->next_event, __entry->deferred) +); + +/** * itimer_state - called when itimer is started or canceled * @which: name of the interval timer * @value: the itimers value, itimer is canceled if value->it_value is diff --git a/include/trace/stages/stage3_trace_output.h b/include/trace/stages/stage3_trace_output.h index fce85ea2df1c..b7d8ef4b9fe1 100644 --- a/include/trace/stages/stage3_trace_output.h +++ b/include/trace/stages/stage3_trace_output.h @@ -64,36 +64,36 @@ #define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field)) #undef __print_flags -#define __print_flags(flag, delim, flag_array...) \ - ({ \ - static const struct trace_print_flags __flags[] = \ - { flag_array, { -1, NULL }}; \ - trace_print_flags_seq(p, delim, flag, __flags); \ +#define __print_flags(flag, delim, flag_array...) \ + ({ \ + static const struct trace_print_flags __flags[] = \ + { flag_array }; \ + trace_print_flags_seq(p, delim, flag, __flags, ARRAY_SIZE(__flags)); \ }) #undef __print_symbolic -#define __print_symbolic(value, symbol_array...) \ - ({ \ - static const struct trace_print_flags symbols[] = \ - { symbol_array, { -1, NULL }}; \ - trace_print_symbols_seq(p, value, symbols); \ +#define __print_symbolic(value, symbol_array...) \ + ({ \ + static const struct trace_print_flags symbols[] = \ + { symbol_array }; \ + trace_print_symbols_seq(p, value, symbols, ARRAY_SIZE(symbols)); \ }) #undef __print_flags_u64 #undef __print_symbolic_u64 #if BITS_PER_LONG == 32 -#define __print_flags_u64(flag, delim, flag_array...) \ - ({ \ - static const struct trace_print_flags_u64 __flags[] = \ - { flag_array, { -1, NULL } }; \ - trace_print_flags_seq_u64(p, delim, flag, __flags); \ +#define __print_flags_u64(flag, delim, flag_array...) \ + ({ \ + static const struct trace_print_flags_u64 __flags[] = \ + { flag_array }; \ + trace_print_flags_seq_u64(p, delim, flag, __flags, ARRAY_SIZE(__flags)); \ }) -#define __print_symbolic_u64(value, symbol_array...) \ - ({ \ - static const struct trace_print_flags_u64 symbols[] = \ - { symbol_array, { -1, NULL } }; \ - trace_print_symbols_seq_u64(p, value, symbols); \ +#define __print_symbolic_u64(value, symbol_array...) \ + ({ \ + static const struct trace_print_flags_u64 symbols[] = \ + { symbol_array }; \ + trace_print_symbols_seq_u64(p, value, symbols, ARRAY_SIZE(symbols)); \ }) #else #define __print_flags_u64(flag, delim, flag_array...) \ |
