From edd3cb05c00a040dc72bed20b14b5ba865188bce Mon Sep 17 00:00:00 2001 From: Simon Schuster Date: Mon, 1 Sep 2025 15:09:51 +0200 Subject: copy_process: pass clone_flags as u64 across calltree With the introduction of clone3 in commit 7f192e3cd316 ("fork: add clone3") the effective bit width of clone_flags on all architectures was increased from 32-bit to 64-bit, with a new type of u64 for the flags. However, for most consumers of clone_flags the interface was not changed from the previous type of unsigned long. While this works fine as long as none of the new 64-bit flag bits (CLONE_CLEAR_SIGHAND and CLONE_INTO_CGROUP) are evaluated, this is still undesirable in terms of the principle of least surprise. Thus, this commit fixes all relevant interfaces of callees to sys_clone3/copy_process (excluding the architecture-specific copy_thread) to consistently pass clone_flags as u64, so that no truncation to 32-bit integers occurs on 32-bit architectures. Signed-off-by: Simon Schuster Link: https://lore.kernel.org/20250901-nios2-implement-clone3-v2-2-53fcf5577d57@siemens-energy.com Acked-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Reviewed-by: Arnd Bergmann Signed-off-by: Christian Brauner --- kernel/time/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index 667452768ed3..888872bcc5bb 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -130,7 +130,7 @@ fail: * * Return: timens_for_children namespace or ERR_PTR. */ -struct time_namespace *copy_time_ns(unsigned long flags, +struct time_namespace *copy_time_ns(u64 flags, struct user_namespace *user_ns, struct time_namespace *old_ns) { if (!(flags & CLONE_NEWTIME)) -- cgit v1.2.3 From 762af5a2aa0ad18da1316666dae30d369268d44c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 25 Aug 2025 15:26:35 +0200 Subject: vdso/vsyscall: Avoid slow division loop in auxiliary clock update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The call to __iter_div_u64_rem() in vdso_time_update_aux() is a wrapper around subtraction. It cannot be used to divide large numbers, as that introduces long, computationally expensive delays. A regular u64 division is also not possible in the timekeeper update path as it can be too slow. Instead of splitting the ktime_t offset into into second and subsecond components during the timekeeper update fast-path, do it together with the adjustment of tk->offs_aux in the slow-path. Equivalent to the handling of offs_boot and monotonic_to_boot. Reuse the storage of monotonic_to_boot for the new field, as it is not used by auxiliary timekeepers. Fixes: 380b84e168e5 ("vdso/vsyscall: Update auxiliary clock data in the datapage") Reported-by: Miroslav Lichvar Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250825-vdso-auxclock-division-v1-1-a1d32a16a313@linutronix.de Closes: https://lore.kernel.org/lkml/aKwsNNWsHJg8IKzj@localhost/ --- kernel/time/timekeeping.c | 10 ++++++++-- kernel/time/vsyscall.c | 4 ++-- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 059fa8b79be6..b6974fce800c 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -83,6 +83,12 @@ static inline bool tk_is_aux(const struct timekeeper *tk) } #endif +static inline void tk_update_aux_offs(struct timekeeper *tk, ktime_t offs) +{ + tk->offs_aux = offs; + tk->monotonic_to_aux = ktime_to_timespec64(offs); +} + /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; @@ -1506,7 +1512,7 @@ static int __timekeeping_inject_offset(struct tk_data *tkd, const struct timespe timekeeping_restore_shadow(tkd); return -EINVAL; } - tks->offs_aux = offs; + tk_update_aux_offs(tks, offs); } timekeeping_update_from_shadow(tkd, TK_UPDATE_ALL); @@ -2937,7 +2943,7 @@ static int aux_clock_set(const clockid_t id, const struct timespec64 *tnew) * xtime ("realtime") is not applicable for auxiliary clocks and * kept in sync with "monotonic". */ - aux_tks->offs_aux = ktime_sub(timespec64_to_ktime(*tnew), tnow); + tk_update_aux_offs(aux_tks, ktime_sub(timespec64_to_ktime(*tnew), tnow)); timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL); return 0; diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index 8ba8b0d8a387..aa59919b8f2c 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -159,10 +159,10 @@ void vdso_time_update_aux(struct timekeeper *tk) if (clock_mode != VDSO_CLOCKMODE_NONE) { fill_clock_configuration(vc, &tk->tkr_mono); - vdso_ts->sec = tk->xtime_sec; + vdso_ts->sec = tk->xtime_sec + tk->monotonic_to_aux.tv_sec; nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; - nsec += tk->offs_aux; + nsec += tk->monotonic_to_aux.tv_nsec; vdso_ts->sec += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &nsec); nsec = nsec << tk->tkr_mono.shift; vdso_ts->nsec = nsec; -- cgit v1.2.3 From 96c88268b79bc0af2014b8732a438a7afc4fff0d Mon Sep 17 00:00:00 2001 From: Gatien Chevallier Date: Mon, 1 Sep 2025 11:16:27 +0200 Subject: time: export timespec64_add_safe() symbol Export the timespec64_add_safe() symbol so that this function can be used in modules where computation of time related is done. Signed-off-by: Gatien Chevallier Acked-by: Thomas Gleixner Link: https://patch.msgid.link/20250901-relative_flex_pps-v4-1-b874971dfe85@foss.st.com Signed-off-by: Jakub Kicinski --- kernel/time/time.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/time') diff --git a/kernel/time/time.c b/kernel/time/time.c index 1b69caa87480..0ba8e3c50d62 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -858,6 +858,7 @@ struct timespec64 timespec64_add_safe(const struct timespec64 lhs, return res; } +EXPORT_SYMBOL_GPL(timespec64_add_safe); /** * get_timespec64 - get user's time value into kernel space -- cgit v1.2.3 From ea1a1fa919a5b4f39fa46073e7b3a19b12521f05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 26 Aug 2025 08:17:08 +0200 Subject: time: Build generic update_vsyscall() only with generic time vDSO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The generic vDSO can be used without the time-related functionality. In that case the generic update_vsyscall() from kernel/time/vsyscall.c should not be built. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250826-vdso-cleanups-v1-5-d9b65750e49f@linutronix.de --- kernel/time/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/Makefile b/kernel/time/Makefile index e6e9b85d4db5..f7d52d9543cc 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -26,7 +26,7 @@ obj-$(CONFIG_LEGACY_TIMER_TICK) += tick-legacy.o ifeq ($(CONFIG_SMP),y) obj-$(CONFIG_NO_HZ_COMMON) += timer_migration.o endif -obj-$(CONFIG_HAVE_GENERIC_VDSO) += vsyscall.o +obj-$(CONFIG_GENERIC_GETTIMEOFDAY) += vsyscall.o obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o obj-$(CONFIG_TEST_UDELAY) += test_udelay.o obj-$(CONFIG_TIME_NS) += namespace.o -- cgit v1.2.3 From 24fb08dcc40f52cf5f95d2cdaa0c26e33a2f4285 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 21 Aug 2025 15:28:08 +0200 Subject: posix-timers: Avoid direct access to hrtimer clockbase MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The field timer->base->get_time is a private implementation detail and should not be accessed outside of the hrtimer core. Switch to the equivalent helpers. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20250821-hrtimer-cleanup-get_time-v2-1-3ae822e5bfbd@linutronix.de --- kernel/time/posix-timers.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 8b582174b1f9..2741f3725de4 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -299,8 +299,7 @@ static void common_hrtimer_rearm(struct k_itimer *timr) { struct hrtimer *timer = &timr->it.real.timer; - timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(), - timr->it_interval); + timr->it_overrun += hrtimer_forward_now(timer, timr->it_interval); hrtimer_restart(timer); } @@ -825,7 +824,7 @@ static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires, hrtimer_setup(&timr->it.real.timer, posix_timer_fn, timr->it_clock, mode); if (!absolute) - expires = ktime_add_safe(expires, timer->base->get_time()); + expires = ktime_add_safe(expires, hrtimer_cb_get_time(timer)); hrtimer_set_expires(timer, expires); if (!sigev_none) -- cgit v1.2.3 From 5f531fe9cb489bf63f71f6e5eee6420c57fbc049 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 21 Aug 2025 15:28:09 +0200 Subject: timers/itimer: Avoid direct access to hrtimer clockbase MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The field timer->base->get_time is a private implementation detail and should not be accessed outside of the hrtimer core. Switch to the equivalent helper. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20250821-hrtimer-cleanup-get_time-v2-2-3ae822e5bfbd@linutronix.de --- kernel/time/itimer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index 876d389b2e21..7c6110e964e7 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -163,8 +163,7 @@ void posixtimer_rearm_itimer(struct task_struct *tsk) struct hrtimer *tmr = &tsk->signal->real_timer; if (!hrtimer_is_queued(tmr) && tsk->signal->it_real_incr != 0) { - hrtimer_forward(tmr, tmr->base->get_time(), - tsk->signal->it_real_incr); + hrtimer_forward_now(tmr, tsk->signal->it_real_incr); hrtimer_restart(tmr); } } -- cgit v1.2.3 From 009eb5da29a91016e3ebb988e6401e79411be7a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 21 Aug 2025 15:28:15 +0200 Subject: hrtimer: Remove hrtimer_clock_base:: Get_time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The get_time() callbacks always need to match the bases clockid. Instead of maintaining that association twice in hrtimer_bases, use a helper. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20250821-hrtimer-cleanup-get_time-v2-8-3ae822e5bfbd@linutronix.de --- kernel/time/hrtimer.c | 34 +++++++++++++++++++++++++--------- kernel/time/timer_list.c | 2 -- 2 files changed, 25 insertions(+), 11 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 30899a8cc52c..fedd1d793f6c 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -59,6 +59,7 @@ #define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) static void retrigger_next_event(void *arg); +static ktime_t __hrtimer_cb_get_time(clockid_t clock_id); /* * The timer bases: @@ -76,42 +77,34 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = { .index = HRTIMER_BASE_MONOTONIC, .clockid = CLOCK_MONOTONIC, - .get_time = &ktime_get, }, { .index = HRTIMER_BASE_REALTIME, .clockid = CLOCK_REALTIME, - .get_time = &ktime_get_real, }, { .index = HRTIMER_BASE_BOOTTIME, .clockid = CLOCK_BOOTTIME, - .get_time = &ktime_get_boottime, }, { .index = HRTIMER_BASE_TAI, .clockid = CLOCK_TAI, - .get_time = &ktime_get_clocktai, }, { .index = HRTIMER_BASE_MONOTONIC_SOFT, .clockid = CLOCK_MONOTONIC, - .get_time = &ktime_get, }, { .index = HRTIMER_BASE_REALTIME_SOFT, .clockid = CLOCK_REALTIME, - .get_time = &ktime_get_real, }, { .index = HRTIMER_BASE_BOOTTIME_SOFT, .clockid = CLOCK_BOOTTIME, - .get_time = &ktime_get_boottime, }, { .index = HRTIMER_BASE_TAI_SOFT, .clockid = CLOCK_TAI, - .get_time = &ktime_get_clocktai, }, }, .csd = CSD_INIT(retrigger_next_event, NULL) @@ -1253,7 +1246,7 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, remove_hrtimer(timer, base, true, force_local); if (mode & HRTIMER_MODE_REL) - tim = ktime_add_safe(tim, base->get_time()); + tim = ktime_add_safe(tim, __hrtimer_cb_get_time(base->clockid)); tim = hrtimer_update_lowres(timer, tim, mode); @@ -1588,6 +1581,29 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id) } } +static ktime_t __hrtimer_cb_get_time(clockid_t clock_id) +{ + switch (clock_id) { + case CLOCK_MONOTONIC: + return ktime_get(); + case CLOCK_REALTIME: + return ktime_get_real(); + case CLOCK_BOOTTIME: + return ktime_get_boottime(); + case CLOCK_TAI: + return ktime_get_clocktai(); + default: + WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id); + return ktime_get(); + } +} + +ktime_t hrtimer_cb_get_time(const struct hrtimer *timer) +{ + return __hrtimer_cb_get_time(timer->base->clockid); +} +EXPORT_SYMBOL_GPL(hrtimer_cb_get_time); + static void __hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *), clockid_t clock_id, enum hrtimer_mode mode) diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index b03d0ada6469..488e47e96e93 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -102,8 +102,6 @@ print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now) SEQ_printf(m, " .index: %d\n", base->index); SEQ_printf(m, " .resolution: %u nsecs\n", hrtimer_resolution); - - SEQ_printf(m, " .get_time: %ps\n", base->get_time); #ifdef CONFIG_HIGH_RES_TIMERS SEQ_printf(m, " .offset: %Lu nsecs\n", (unsigned long long) ktime_to_ns(base->offset)); -- cgit v1.2.3 From 3c3af563b31766f67106c7549ad084a08ef613f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 21 Aug 2025 15:28:16 +0200 Subject: hrtimer: Reorder branches in hrtimer_clockid_to_base() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Align the ordering to the one used for hrtimer_bases. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250821-hrtimer-cleanup-get_time-v2-9-3ae822e5bfbd@linutronix.de --- kernel/time/hrtimer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index fedd1d793f6c..f383df28c532 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1567,10 +1567,10 @@ u64 hrtimer_next_event_without(const struct hrtimer *exclude) static inline int hrtimer_clockid_to_base(clockid_t clock_id) { switch (clock_id) { - case CLOCK_REALTIME: - return HRTIMER_BASE_REALTIME; case CLOCK_MONOTONIC: return HRTIMER_BASE_MONOTONIC; + case CLOCK_REALTIME: + return HRTIMER_BASE_REALTIME; case CLOCK_BOOTTIME: return HRTIMER_BASE_BOOTTIME; case CLOCK_TAI: -- cgit v1.2.3 From fe2a449a45b13df1562419e0104b4777b6ea5248 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 6 Sep 2025 14:49:51 +0800 Subject: tick: Do not set device to detached state in tick_shutdown() tick_shutdown() sets the state of the clockevent device to detached first and the invokes clockevents_exchange_device(), which in turn invokes clockevents_switch_state(). But clockevents_switch_state() returns without invoking the device shutdown callback as the device is already in detached state. As a consequence the timer device is not shutdown when a CPU goes offline. tick_shutdown() does this because it was originally invoked on a online CPU and not on the outgoing CPU. It therefore could not access the clockevent device of the already offlined CPU and just set the state. Since commit 3b1596a21fbf tick_shutdown() is called on the outgoing CPU, so the hardware device can be accessed. Remove the state set before calling clockevents_exchange_device(), so that the subsequent clockevents_switch_state() handles the state transition and invokes the shutdown callback of the clockevent device. [ tglx: Massaged change log ] Fixes: 3b1596a21fbf ("clockevents: Shutdown and unregister current clockevents at CPUHP_AP_TICK_DYING") Signed-off-by: Bibo Mao Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/all/20250906064952.3749122-2-maobibo@loongson.cn --- kernel/time/clockevents.c | 2 +- kernel/time/tick-common.c | 16 +++++----------- kernel/time/tick-internal.h | 2 +- 3 files changed, 7 insertions(+), 13 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index f3e831f62906..a59bc75ab7c5 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -633,7 +633,7 @@ void tick_offline_cpu(unsigned int cpu) raw_spin_lock(&clockevents_lock); tick_broadcast_offline(cpu); - tick_shutdown(cpu); + tick_shutdown(); /* * Unregister the clock event devices which were diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 9a3859443c04..7e33d3f2e889 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -411,24 +411,18 @@ int tick_cpu_dying(unsigned int dying_cpu) } /* - * Shutdown an event device on a given cpu: + * Shutdown an event device on the outgoing CPU: * - * This is called on a life CPU, when a CPU is dead. So we cannot - * access the hardware device itself. - * We just set the mode and remove it from the lists. + * Called by the dying CPU during teardown, with clockevents_lock held + * and interrupts disabled. */ -void tick_shutdown(unsigned int cpu) +void tick_shutdown(void) { - struct tick_device *td = &per_cpu(tick_cpu_device, cpu); + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); struct clock_event_device *dev = td->evtdev; td->mode = TICKDEV_MODE_PERIODIC; if (dev) { - /* - * Prevent that the clock events layer tries to call - * the set mode function! - */ - clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); clockevents_exchange_device(dev, NULL); dev->event_handler = clockevents_handle_noop; td->evtdev = NULL; diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index faac36de35b9..4e4f7bbe2a64 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -26,7 +26,7 @@ extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); extern void tick_handle_periodic(struct clock_event_device *dev); extern void tick_check_new_device(struct clock_event_device *dev); extern void tick_offline_cpu(unsigned int cpu); -extern void tick_shutdown(unsigned int cpu); +extern void tick_shutdown(void); extern void tick_suspend(void); extern void tick_resume(void); extern bool tick_check_replacement(struct clock_event_device *curdev, -- cgit v1.2.3 From e895f8e29119c8c966ea794af9e9100b10becb88 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Tue, 5 Aug 2025 16:10:25 +0800 Subject: hrtimers: Unconditionally update target CPU base after offline timer migration When testing softirq based hrtimers on an ARM32 board, with high resolution mode and NOHZ inactive, softirq based hrtimers fail to expire after being moved away from an offline CPU: CPU0 CPU1 hrtimer_start(..., HRTIMER_MODE_SOFT); cpu_down(CPU1) ... hrtimers_cpu_dying() // Migrate timers to CPU0 smp_call_function_single(CPU0, returgger_next_event); retrigger_next_event() if (!highres && !nohz) return; As retrigger_next_event() is a NOOP when both high resolution timers and NOHZ are inactive CPU0's hrtimer_cpu_base::softirq_expires_next is not updated and the migrated softirq timers never expire unless there is a softirq based hrtimer queued on CPU0 later. Fix this by removing the hrtimer_hres_active() and tick_nohz_active() check in retrigger_next_event(), which enforces a full update of the CPU base. As this is not a fast path the extra cost does not matter. [ tglx: Massaged change log ] Fixes: 5c0930ccaad5 ("hrtimers: Push pending hrtimers away from outgoing CPU earlier") Co-developed-by: Frederic Weisbecker Signed-off-by: Frederic Weisbecker Signed-off-by: Xiongfeng Wang Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250805081025.54235-1-wangxiongfeng2@huawei.com --- kernel/time/hrtimer.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 30899a8cc52c..e8c479329282 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -787,10 +787,10 @@ static void retrigger_next_event(void *arg) * of the next expiring timer is enough. The return from the SMP * function call will take care of the reprogramming in case the * CPU was in a NOHZ idle sleep. + * + * In periodic low resolution mode, the next softirq expiration + * must also be updated. */ - if (!hrtimer_hres_active(base) && !tick_nohz_active) - return; - raw_spin_lock(&base->lock); hrtimer_update_base(base); if (hrtimer_hres_active(base)) @@ -2295,11 +2295,6 @@ int hrtimers_cpu_dying(unsigned int dying_cpu) &new_base->clock_base[i]); } - /* - * The migration might have changed the first expiring softirq - * timer on this CPU. Update it. - */ - __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT); /* Tell the other CPU to retrigger the next event */ smp_call_function_single(ncpu, retrigger_next_event, NULL, 0); -- cgit v1.2.3 From b9aa93aa5185aee76c4c7a5ba4432b4d0d15f797 Mon Sep 17 00:00:00 2001 From: Jiri Wiesner Date: Thu, 31 Jul 2025 18:18:37 +0200 Subject: clocksource: Print durations for sync check unconditionally A typical set of messages that gets printed as a result of the clocksource watchdog finding the TSC unstable usually does not contain messages indicating CPUs being ahead of or behind the CPU from which the check is carried out. That fact suggests that the TSC does not experience time skew between CPUs (if the clocksource.verify_n_cpus parameter is set to a negative value) but quantitative information is missing. The cs_nsec_max value printed by the "CPU %d check durations" message actually provides a worst case estimate of the time skew. If all CPUs have been checked, the cs_nsec_max value multiplied by 2 is the maximum possible time skew between the TSCs of any two CPUs on the system. The worst case estimate is derived from two boundary cases: 1. No time is consumed to execute instructions between csnow_begin and csnow_mid while all the cs_nsec_max time is consumed by the code between csnow_mid and csnow_end. In this case, the maximum undetectable time skew of a CPU being ahead would be cs_nsec_max. 2. All the cs_nsec_max time is consumed to execute instructions between csnow_begin and csnow_mid while no time is consumed by the code between csnow_mid and csnow_end. In this case, the maximum undetectable time skew of a CPU being behind would be cs_nsec_max. The worst case estimate assumes a system experiencing a corner case consisting of the two boundary cases. Always print the "CPU %d check durations" message so that the maximum possible time skew measured by the TSC sync check can be compared to the time skew measured by the clocksource watchdog. Signed-off-by: Jiri Wiesner Signed-off-by: Thomas Gleixner Reviewed-by: Paul E. McKenney Link: https://lore.kernel.org/all/aIuXXfdITXdI0lLp@incl --- kernel/time/clocksource.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 0aef0e349e49..3edb01db3aa1 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -407,9 +407,8 @@ void clocksource_verify_percpu(struct clocksource *cs) if (!cpumask_empty(&cpus_behind)) pr_warn(" CPUs %*pbl behind CPU %d for clocksource %s.\n", cpumask_pr_args(&cpus_behind), testcpu, cs->name); - if (!cpumask_empty(&cpus_ahead) || !cpumask_empty(&cpus_behind)) - pr_warn(" CPU %d check durations %lldns - %lldns for clocksource %s.\n", - testcpu, cs_nsec_min, cs_nsec_max, cs->name); + pr_info(" CPU %d check durations %lldns - %lldns for clocksource %s.\n", + testcpu, cs_nsec_min, cs_nsec_max, cs->name); } EXPORT_SYMBOL_GPL(clocksource_verify_percpu); -- cgit v1.2.3 From 7b0e2c83624b02a8a11f04f0581721d95ea6e4a6 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 12 Sep 2025 13:52:36 +0200 Subject: time: use ns_common_init() Don't cargo-cult the same thing over and over. Reviewed-by: Jan Kara Reviewed-by: Thomas Gleixner Signed-off-by: Christian Brauner --- kernel/time/namespace.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index 667452768ed3..0be93d8f2896 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -88,22 +88,19 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns, goto fail; err = -ENOMEM; - ns = kmalloc(sizeof(*ns), GFP_KERNEL_ACCOUNT); + ns = kzalloc(sizeof(*ns), GFP_KERNEL_ACCOUNT); if (!ns) goto fail_dec; - refcount_set(&ns->ns.count, 1); - ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!ns->vvar_page) goto fail_free; - err = ns_alloc_inum(&ns->ns); + err = ns_common_init(&ns->ns, &timens_operations, true); if (err) goto fail_free_page; ns->ucounts = ucounts; - ns->ns.ops = &timens_operations; ns->user_ns = get_user_ns(user_ns); ns->offsets = old_ns->offsets; ns->frozen_offsets = false; -- cgit v1.2.3 From b36c823b9a4be5b0c8e38c3fd60cade7d41c216c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 12 Sep 2025 13:52:46 +0200 Subject: time: support ns lookup Support the generic ns lookup infrastructure to support file handles for namespaces. Reviewed-by: Thomas Gleixner Signed-off-by: Christian Brauner --- kernel/time/namespace.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index 0be93d8f2896..408f60d0a3b6 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -104,6 +105,7 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns, ns->user_ns = get_user_ns(user_ns); ns->offsets = old_ns->offsets; ns->frozen_offsets = false; + ns_tree_add(ns); return ns; fail_free_page: @@ -250,11 +252,13 @@ out: void free_time_ns(struct time_namespace *ns) { + ns_tree_remove(ns); dec_time_namespaces(ns->ucounts); put_user_ns(ns->user_ns); ns_free_inum(&ns->ns); __free_page(ns->vvar_page); - kfree(ns); + /* Concurrent nstree traversal depends on a grace period. */ + kfree_rcu(ns, ns.ns_rcu); } static struct time_namespace *to_time_ns(struct ns_common *ns) @@ -487,3 +491,8 @@ struct time_namespace init_time_ns = { .ns.ops = &timens_operations, .frozen_offsets = true, }; + +void __init time_ns_init(void) +{ + ns_tree_add(&init_time_ns); +} -- cgit v1.2.3 From d7afdf889561058068ab46fd8f306c70ef29216a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 12 Sep 2025 13:52:49 +0200 Subject: ns: add to__ns() to respective headers Every namespace type has a container_of(ns, , ns) static inline function that is currently not exposed in the header. So we have a bunch of places that open-code it via container_of(). Move it to the headers so we can use it directly. Reviewed-by: Aleksa Sarai Signed-off-by: Christian Brauner --- kernel/time/namespace.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index 408f60d0a3b6..20b65f90549e 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -261,11 +261,6 @@ void free_time_ns(struct time_namespace *ns) kfree_rcu(ns, ns.ns_rcu); } -static struct time_namespace *to_time_ns(struct ns_common *ns) -{ - return container_of(ns, struct time_namespace, ns); -} - static struct ns_common *timens_get(struct task_struct *task) { struct time_namespace *ns = NULL; -- cgit v1.2.3 From 5612ff3ec588be09f11a9424db6d1186bcdeb3fa Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 17 Sep 2025 12:28:07 +0200 Subject: nscommon: simplify initialization There's a lot of information that namespace implementers don't need to know about at all. Encapsulate this all in the initialization helper. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- kernel/time/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index 20b65f90549e..ce8e952104a7 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -97,7 +97,7 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns, if (!ns->vvar_page) goto fail_free; - err = ns_common_init(&ns->ns, &timens_operations, true); + err = ns_common_init(ns, &timens_operations); if (err) goto fail_free_page; -- cgit v1.2.3 From be5f21d3985f00827e09b798f7a07ebd6dd7f54a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 17 Sep 2025 12:28:08 +0200 Subject: ns: add ns_common_free() And drop ns_free_inum(). Anything common that can be wasted centrally should be wasted in the new common helper. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- kernel/time/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index ce8e952104a7..d49c73015d6e 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -255,7 +255,7 @@ void free_time_ns(struct time_namespace *ns) ns_tree_remove(ns); dec_time_namespaces(ns->ucounts); put_user_ns(ns->user_ns); - ns_free_inum(&ns->ns); + ns_common_free(ns); __free_page(ns->vvar_page); /* Concurrent nstree traversal depends on a grace period. */ kfree_rcu(ns, ns.ns_rcu); -- cgit v1.2.3 From 024596a4e2802e457a9f92af79f246fa9631f8de Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 18 Sep 2025 12:11:59 +0200 Subject: ns: rename to __ns_ref Make it easier to grep and rename to ns_count. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- kernel/time/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index d49c73015d6e..d70bdfb7b001 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -480,7 +480,7 @@ const struct proc_ns_operations timens_for_children_operations = { }; struct time_namespace init_time_ns = { - .ns.count = REFCOUNT_INIT(3), + .ns.__ns_ref = REFCOUNT_INIT(3), .user_ns = &init_user_ns, .ns.inum = PROC_TIME_INIT_INO, .ns.ops = &timens_operations, -- cgit v1.2.3 From 7cf730321132e726ff949c6f3c0d5c598788f7a2 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 19 Sep 2025 11:29:49 +0200 Subject: ns: use inode initializer for initial namespaces Just use the common helper we have. Signed-off-by: Christian Brauner --- kernel/time/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index d70bdfb7b001..7aa4d6fedd49 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -482,7 +482,7 @@ const struct proc_ns_operations timens_for_children_operations = { struct time_namespace init_time_ns = { .ns.__ns_ref = REFCOUNT_INIT(3), .user_ns = &init_user_ns, - .ns.inum = PROC_TIME_INIT_INO, + .ns.inum = ns_init_inum(&init_time_ns), .ns.ops = &timens_operations, .frozen_offsets = true, }; -- cgit v1.2.3 From 391253b25f078d2fe5657a1dedd360396d186407 Mon Sep 17 00:00:00 2001 From: Haofeng Li Date: Wed, 10 Sep 2025 17:37:03 +0800 Subject: time: Fix spelling mistakes in comments Correct several typos found in comments across various files in the kernel/time directory. No functional changes are introduced by these corrections. Signed-off-by: Haofeng Li Signed-off-by: Thomas Gleixner --- kernel/time/alarmtimer.c | 2 +- kernel/time/clocksource.c | 2 +- kernel/time/hrtimer.c | 2 +- kernel/time/posix-timers.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 577f0e6842d4..069d93bfb0c7 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -35,7 +35,7 @@ /** * struct alarm_base - Alarm timer bases - * @lock: Lock for syncrhonized access to the base + * @lock: Lock for synchronized access to the base * @timerqueue: Timerqueue head managing the list of events * @get_ktime: Function to read the time correlating to the base * @get_timespec: Function to read the namespace time correlating to the base diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 3edb01db3aa1..a1890a073196 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -144,7 +144,7 @@ static u64 suspend_start; * Default for maximum permissible skew when cs->uncertainty_margin is * not specified, and the lower bound even when cs->uncertainty_margin * is specified. This is also the default that is used when registering - * clocks with unspecifed cs->uncertainty_margin, so this macro is used + * clocks with unspecified cs->uncertainty_margin, so this macro is used * even in CONFIG_CLOCKSOURCE_WATCHDOG=n kernels. */ #define WATCHDOG_MAX_SKEW (MAX_SKEW_USEC * NSEC_PER_USEC) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index f383df28c532..7e7b2b471bae 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -201,7 +201,7 @@ static bool hrtimer_suitable_target(struct hrtimer *timer, struct hrtimer_clock_ /* * The offline local CPU can't be the default target if the * next remote target event is after this timer. Keep the - * elected new base. An IPI will we issued to reprogram + * elected new base. An IPI will be issued to reprogram * it as a last resort. */ if (!hrtimer_base_is_online(this_cpu_base)) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 2741f3725de4..aa3120104a51 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -534,7 +534,7 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, goto out; } /* - * After succesful copy out, the timer ID is visible to user space + * After successful copy out, the timer ID is visible to user space * now but not yet valid because new_timer::signal low order bit is 1. * * Complete the initialization with the clock specific create -- cgit v1.2.3 From d7610cb7454bbd8bf6d58f71b0ed57155d3c545f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 22 Sep 2025 14:42:36 +0200 Subject: ns: simplify ns_common_init() further Simply derive the ns operations from the namespace type. Acked-by: Thomas Gleixner Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- kernel/time/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index 7aa4d6fedd49..9f26e61be044 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -97,7 +97,7 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns, if (!ns->vvar_page) goto fail_free; - err = ns_common_init(ns, &timens_operations); + err = ns_common_init(ns); if (err) goto fail_free_page; -- cgit v1.2.3 From 84b1a903aed876a3fd6bb04786947f640d4d8e62 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 2 Jun 2025 17:18:51 +0200 Subject: time/sched_clock: Export symbol for sched_clock register function The timer drivers could be converted into modules. The different functions to register the clocksource or the clockevent are already exporting their symbols for modules but the sched_clock_register() function is missing. Export the symbols so the drivers using this function can be converted into modules. Signed-off-by: Daniel Lezcano Reviewed-by: Thomas Gleixner Reviewed-by: Carlos Llamas Reviewed-by: Will McVicker Acked-by: John Stultz Link: https://lore.kernel.org/r/20250602151853.1942521-8-daniel.lezcano@linaro.org --- kernel/time/sched_clock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index cc15fe293719..cc1afec306b3 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -174,8 +174,7 @@ static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt) return HRTIMER_RESTART; } -void __init -sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) +void sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) { u64 res, wrap, new_mask, new_epoch, cyc, ns; u32 new_mult, new_shift; @@ -247,6 +246,7 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) pr_debug("Registered %pS as sched_clock source\n", read); } +EXPORT_SYMBOL_GPL(sched_clock_register); void __init generic_sched_clock_init(void) { -- cgit v1.2.3 From 4055526d35746ce8b04bfa5e14e14f28bb163186 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 24 Sep 2025 13:33:59 +0200 Subject: ns: move ns type into struct ns_common It's misplaced in struct proc_ns_operations and ns->ops might be NULL if the namespace is compiled out but we still want to know the type of the namespace for the initial namespace struct. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- kernel/time/namespace.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index 9f26e61be044..530cf99c2212 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -462,7 +462,6 @@ out: const struct proc_ns_operations timens_operations = { .name = "time", - .type = CLONE_NEWTIME, .get = timens_get, .put = timens_put, .install = timens_install, @@ -472,7 +471,6 @@ const struct proc_ns_operations timens_operations = { const struct proc_ns_operations timens_for_children_operations = { .name = "time_for_children", .real_ns_name = "time", - .type = CLONE_NEWTIME, .get = timens_for_children_get, .put = timens_put, .install = timens_install, @@ -480,6 +478,7 @@ const struct proc_ns_operations timens_for_children_operations = { }; struct time_namespace init_time_ns = { + .ns.ns_type = ns_common_type(&init_time_ns), .ns.__ns_ref = REFCOUNT_INIT(3), .user_ns = &init_user_ns, .ns.inum = ns_init_inum(&init_time_ns), -- cgit v1.2.3 From 39a9ed0fb6dac58547afdf9b6cb032d326a3698f Mon Sep 17 00:00:00 2001 From: Haofeng Li Date: Wed, 15 Oct 2025 14:17:53 +0800 Subject: timekeeping: Fix aux clocks sysfs initialization loop bound The loop in tk_aux_sysfs_init() uses `i <= MAX_AUX_CLOCKS` as the termination condition, which results in 9 iterations (i=0 to 8) when MAX_AUX_CLOCKS is defined as 8. However, the kernel is designed to support only up to 8 auxiliary clocks. This off-by-one error causes the creation of a 9th sysfs entry that exceeds the intended auxiliary clock range. Fix the loop bound to use `i < MAX_AUX_CLOCKS` to ensure exactly 8 auxiliary clock entries are created, matching the design specification. Fixes: 7b95663a3d96 ("timekeeping: Provide interface to control auxiliary clocks") Signed-off-by: Haofeng Li Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/tencent_2376993D9FC06A3616A4F981B3DE1C599607@qq.com --- kernel/time/timekeeping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b6974fce800c..3a4d3b2e3f74 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -3070,7 +3070,7 @@ static int __init tk_aux_sysfs_init(void) return -ENOMEM; } - for (int i = 0; i <= MAX_AUX_CLOCKS; i++) { + for (int i = 0; i < MAX_AUX_CLOCKS; i++) { char id[2] = { [0] = '0' + i, }; struct kobject *clk = kobject_create_and_add(id, auxo); -- cgit v1.2.3