From 990518eb3a71c357ca4ff1ad3e747fb844d8094c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 19 May 2025 10:33:15 +0200 Subject: timekeeping: Remove hardcoded access to tk_core This was overlooked in the initial conversion. Use the provided pointer to access the shadow timekeeper. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250519083025.652611452@linutronix.de --- kernel/time/timekeeping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index a009c91f7b05..2ad78fbdc9ff 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -663,7 +663,7 @@ static void timekeeping_restore_shadow(struct tk_data *tkd) static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int action) { - struct timekeeper *tk = &tk_core.shadow_timekeeper; + struct timekeeper *tk = &tkd->shadow_timekeeper; lockdep_assert_held(&tkd->lock); -- cgit v1.2.3 From 506a54a0316ee4854b0ed113a8001477f5211d50 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 19 May 2025 10:33:16 +0200 Subject: timekeeping: Cleanup kernel doc of __ktime_get_real_seconds() Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250519083025.715836017@linutronix.de --- kernel/time/timekeeping.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 2ad78fbdc9ff..d88d19fb794c 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -975,9 +975,14 @@ time64_t ktime_get_real_seconds(void) EXPORT_SYMBOL_GPL(ktime_get_real_seconds); /** - * __ktime_get_real_seconds - The same as ktime_get_real_seconds - * but without the sequence counter protect. This internal function - * is called just when timekeeping lock is already held. + * __ktime_get_real_seconds - Unprotected access to CLOCK_REALTIME seconds + * + * The same as ktime_get_real_seconds() but without the sequence counter + * protection. This function is used in restricted contexts like the x86 MCE + * handler and in KGDB. It's unprotected on 32-bit vs. concurrent half + * completed modification and only to be used for such critical contexts. + * + * Returns: Racy snapshot of the CLOCK_REALTIME seconds value */ noinstr time64_t __ktime_get_real_seconds(void) { -- cgit v1.2.3 From 7e55b6ba1fe6987638160e5f8216288f38043759 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 19 May 2025 10:33:17 +0200 Subject: timekeeping: Avoid double notification in do_adjtimex() Consolidate do_adjtimex() so that it does not notify about clock changes twice. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250519083025.779267274@linutronix.de --- kernel/time/timekeeping.c | 98 +++++++++++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 42 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index d88d19fb794c..fb1da87a92f1 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1418,40 +1418,49 @@ int do_settimeofday64(const struct timespec64 *ts) EXPORT_SYMBOL(do_settimeofday64); /** - * timekeeping_inject_offset - Adds or subtracts from the current time. + * __timekeeping_inject_offset - Adds or subtracts from the current time. * @ts: Pointer to the timespec variable containing the offset * * Adds or subtracts an offset value from the current time. */ -static int timekeeping_inject_offset(const struct timespec64 *ts) +static int __timekeeping_inject_offset(const struct timespec64 *ts) { + struct timekeeper *tks = &tk_core.shadow_timekeeper; + struct timespec64 tmp; + if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC) return -EINVAL; - scoped_guard (raw_spinlock_irqsave, &tk_core.lock) { - struct timekeeper *tks = &tk_core.shadow_timekeeper; - struct timespec64 tmp; - timekeeping_forward_now(tks); - - /* Make sure the proposed value is valid */ - tmp = timespec64_add(tk_xtime(tks), *ts); - if (timespec64_compare(&tks->wall_to_monotonic, ts) > 0 || - !timespec64_valid_settod(&tmp)) { - timekeeping_restore_shadow(&tk_core); - return -EINVAL; - } + timekeeping_forward_now(tks); - tk_xtime_add(tks, ts); - tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, *ts)); - timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); + /* Make sure the proposed value is valid */ + tmp = timespec64_add(tk_xtime(tks), *ts); + if (timespec64_compare(&tks->wall_to_monotonic, ts) > 0 || + !timespec64_valid_settod(&tmp)) { + timekeeping_restore_shadow(&tk_core); + return -EINVAL; } - /* Signal hrtimers about time change */ - clock_was_set(CLOCK_SET_WALL); + tk_xtime_add(tks, ts); + tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, *ts)); + timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); return 0; } +static int timekeeping_inject_offset(const struct timespec64 *ts) +{ + int ret; + + scoped_guard (raw_spinlock_irqsave, &tk_core.lock) + ret = __timekeeping_inject_offset(ts); + + /* Signal hrtimers about time change */ + if (!ret) + clock_was_set(CLOCK_SET_WALL); + return ret; +} + /* * Indicates if there is an offset between the system clock and the hardware * clock/persistent clock/rtc. @@ -2186,7 +2195,7 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset, * timekeeping_advance - Updates the timekeeper to the current time and * current NTP tick length */ -static bool timekeeping_advance(enum timekeeping_adv_mode mode) +static bool __timekeeping_advance(enum timekeeping_adv_mode mode) { struct timekeeper *tk = &tk_core.shadow_timekeeper; struct timekeeper *real_tk = &tk_core.timekeeper; @@ -2194,8 +2203,6 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode) int shift = 0, maxshift; u64 offset, orig_offset; - guard(raw_spinlock_irqsave)(&tk_core.lock); - /* Make sure we're fully resumed: */ if (unlikely(timekeeping_suspended)) return false; @@ -2249,6 +2256,12 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode) return !!clock_set; } +static bool timekeeping_advance(enum timekeeping_adv_mode mode) +{ + guard(raw_spinlock_irqsave)(&tk_core.lock); + return __timekeeping_advance(mode); +} + /** * update_wall_time - Uses the current clocksource to increment the wall time * @@ -2537,10 +2550,10 @@ EXPORT_SYMBOL_GPL(random_get_entropy_fallback); */ int do_adjtimex(struct __kernel_timex *txc) { + struct timespec64 delta, ts; struct audit_ntp_data ad; bool offset_set = false; bool clock_set = false; - struct timespec64 ts; int ret; /* Validate the data before disabling interrupts */ @@ -2549,21 +2562,6 @@ int do_adjtimex(struct __kernel_timex *txc) return ret; add_device_randomness(txc, sizeof(*txc)); - if (txc->modes & ADJ_SETOFFSET) { - struct timespec64 delta; - - delta.tv_sec = txc->time.tv_sec; - delta.tv_nsec = txc->time.tv_usec; - if (!(txc->modes & ADJ_NANO)) - delta.tv_nsec *= 1000; - ret = timekeeping_inject_offset(&delta); - if (ret) - return ret; - - offset_set = delta.tv_sec != 0; - audit_tk_injoffset(delta); - } - audit_ntp_init(&ad); ktime_get_real_ts64(&ts); @@ -2573,6 +2571,19 @@ int do_adjtimex(struct __kernel_timex *txc) struct timekeeper *tks = &tk_core.shadow_timekeeper; s32 orig_tai, tai; + if (txc->modes & ADJ_SETOFFSET) { + delta.tv_sec = txc->time.tv_sec; + delta.tv_nsec = txc->time.tv_usec; + if (!(txc->modes & ADJ_NANO)) + delta.tv_nsec *= 1000; + ret = __timekeeping_inject_offset(&delta); + if (ret) + return ret; + + offset_set = delta.tv_sec != 0; + clock_set = true; + } + orig_tai = tai = tks->tai_offset; ret = __do_adjtimex(txc, &ts, &tai, &ad); @@ -2583,13 +2594,16 @@ int do_adjtimex(struct __kernel_timex *txc) } else { tk_update_leap_state_all(&tk_core); } + + /* Update the multiplier immediately if frequency was set directly */ + if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK)) + clock_set |= __timekeeping_advance(TK_ADV_FREQ); } - audit_ntp_log(&ad); + if (txc->modes & ADJ_SETOFFSET) + audit_tk_injoffset(delta); - /* Update the multiplier immediately if frequency was set directly */ - if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK)) - clock_set |= timekeeping_advance(TK_ADV_FREQ); + audit_ntp_log(&ad); if (clock_set) clock_was_set(CLOCK_SET_WALL); -- cgit v1.2.3 From f12b45862c4dcb9c2937b83ed730e473b9a76cbf Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 19 May 2025 10:33:19 +0200 Subject: timekeeping: Introduce timekeeper ID As long as there is only a single timekeeper, there is no need to clarify which timekeeper is used. But with the upcoming reusage of the timekeeper infrastructure for auxiliary clock timekeepers, an ID is required to differentiate. Introduce an enum for timekeeper IDs, introduce a field in struct tk_data to store this timekeeper id and add also initialization. The id struct field is added at the end of the second cachline, as there is a 4 byte hole anyway. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250519083025.842476378@linutronix.de --- kernel/time/timekeeping.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index fb1da87a92f1..f4692fc2ea6b 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1663,10 +1663,11 @@ read_persistent_wall_and_boot_offset(struct timespec64 *wall_time, *boot_offset = ns_to_timespec64(local_clock()); } -static __init void tkd_basic_setup(struct tk_data *tkd) +static __init void tkd_basic_setup(struct tk_data *tkd, enum timekeeper_ids tk_id) { raw_spin_lock_init(&tkd->lock); seqcount_raw_spinlock_init(&tkd->seq, &tkd->lock); + tkd->timekeeper.id = tkd->shadow_timekeeper.id = tk_id; } /* @@ -1696,7 +1697,7 @@ void __init timekeeping_init(void) struct timekeeper *tks = &tk_core.shadow_timekeeper; struct clocksource *clock; - tkd_basic_setup(&tk_core); + tkd_basic_setup(&tk_core, TIMEKEEPER_CORE); read_persistent_wall_and_boot_offset(&wall_time, &boot_offset); if (timespec64_valid_settod(&wall_time) && -- cgit v1.2.3 From 9094c72c3d81bf2416b7c79d12c8494ab8fbac20 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 19 May 2025 10:33:20 +0200 Subject: time: Introduce auxiliary POSIX clocks To support auxiliary timekeeping and the related user space interfaces, it's required to define a clock ID range for them. Reserve 8 auxiliary clock IDs after the regular timekeeping clock ID space. This is the maximum number of auxiliary clocks the kernel can support. The actual number of supported clocks depends obviously on the presence of related devices and might be constraint by the available VDSO space. Add the corresponding timekeeper IDs as well. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250519083025.905800695@linutronix.de --- kernel/time/Kconfig | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index b0b97a60aaa6..7c6a52f7836c 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -82,9 +82,9 @@ config CONTEXT_TRACKING_IDLE help Tracks idle state on behalf of RCU. -if GENERIC_CLOCKEVENTS menu "Timers subsystem" +if GENERIC_CLOCKEVENTS # Core internal switch. Selected by NO_HZ_COMMON / HIGH_RES_TIMERS. This is # only related to the tick functionality. Oneshot clockevent devices # are supported independent of this. @@ -208,6 +208,17 @@ config CLOCKSOURCE_WATCHDOG_MAX_SKEW_US interval and NTP's maximum frequency drift of 500 parts per million. If the clocksource is good enough for NTP, it is good enough for the clocksource watchdog! +endif + +config POSIX_AUX_CLOCKS + bool "Enable auxiliary POSIX clocks" + depends on POSIX_TIMERS + help + Auxiliary POSIX clocks are clocks which can be steered + independently of the core timekeeper, which controls the + MONOTONIC, REALTIME, BOOTTIME and TAI clocks. They are useful to + provide e.g. lockless time accessors to independent PTP clocks + and other clock domains, which are not correlated to the TAI/NTP + notion of time. endmenu -endif -- cgit v1.2.3 From 8515714b0f88a698a4c26f0f0ce7d43ad14dce16 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 19 May 2025 10:33:21 +0200 Subject: ntp: Add support for auxiliary timekeepers If auxiliary clocks are enabled, provide an array of NTP data so that the auxiliary timekeepers can be steered independently of the core timekeeper. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250519083025.969000914@linutronix.de --- kernel/time/ntp.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index b837d3d9d325..5b5a0f76866d 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "ntp_internal.h" #include "timekeeping_internal.h" @@ -86,14 +87,16 @@ struct ntp_data { #endif }; -static struct ntp_data tk_ntp_data = { - .tick_usec = USER_TICK_USEC, - .time_state = TIME_OK, - .time_status = STA_UNSYNC, - .time_constant = 2, - .time_maxerror = NTP_PHASE_LIMIT, - .time_esterror = NTP_PHASE_LIMIT, - .ntp_next_leap_sec = TIME64_MAX, +static struct ntp_data tk_ntp_data[TIMEKEEPERS_MAX] = { + [ 0 ... TIMEKEEPERS_MAX - 1 ] = { + .tick_usec = USER_TICK_USEC, + .time_state = TIME_OK, + .time_status = STA_UNSYNC, + .time_constant = 2, + .time_maxerror = NTP_PHASE_LIMIT, + .time_esterror = NTP_PHASE_LIMIT, + .ntp_next_leap_sec = TIME64_MAX, + }, }; #define SECS_PER_DAY 86400 @@ -351,13 +354,13 @@ static void __ntp_clear(struct ntp_data *ntpdata) */ void ntp_clear(void) { - __ntp_clear(&tk_ntp_data); + __ntp_clear(&tk_ntp_data[TIMEKEEPER_CORE]); } u64 ntp_tick_length(void) { - return tk_ntp_data.tick_length; + return tk_ntp_data[TIMEKEEPER_CORE].tick_length; } /** @@ -368,7 +371,7 @@ u64 ntp_tick_length(void) */ ktime_t ntp_get_next_leap(void) { - struct ntp_data *ntpdata = &tk_ntp_data; + struct ntp_data *ntpdata = &tk_ntp_data[TIMEKEEPER_CORE]; ktime_t ret; if ((ntpdata->time_state == TIME_INS) && (ntpdata->time_status & STA_INS)) @@ -389,7 +392,7 @@ ktime_t ntp_get_next_leap(void) */ int second_overflow(time64_t secs) { - struct ntp_data *ntpdata = &tk_ntp_data; + struct ntp_data *ntpdata = &tk_ntp_data[TIMEKEEPER_CORE]; s64 delta; int leap = 0; s32 rem; @@ -605,7 +608,7 @@ static inline int update_rtc(struct timespec64 *to_set, unsigned long *offset_ns */ static inline bool ntp_synced(void) { - return !(tk_ntp_data.time_status & STA_UNSYNC); + return !(tk_ntp_data[TIMEKEEPER_CORE].time_status & STA_UNSYNC); } /* @@ -762,7 +765,7 @@ static inline void process_adjtimex_modes(struct ntp_data *ntpdata, const struct int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts, s32 *time_tai, struct audit_ntp_data *ad) { - struct ntp_data *ntpdata = &tk_ntp_data; + struct ntp_data *ntpdata = &tk_ntp_data[TIMEKEEPER_CORE]; int result; if (txc->modes & ADJ_ADJTIME) { @@ -1031,8 +1034,8 @@ static void hardpps_update_phase(struct ntp_data *ntpdata, long error) */ void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts) { + struct ntp_data *ntpdata = &tk_ntp_data[TIMEKEEPER_CORE]; struct pps_normtime pts_norm, freq_norm; - struct ntp_data *ntpdata = &tk_ntp_data; pts_norm = pps_normalize_ts(*phase_ts); @@ -1083,18 +1086,18 @@ void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_t static int __init ntp_tick_adj_setup(char *str) { - int rc = kstrtos64(str, 0, &tk_ntp_data.ntp_tick_adj); + int rc = kstrtos64(str, 0, &tk_ntp_data[TIMEKEEPER_CORE].ntp_tick_adj); if (rc) return rc; - tk_ntp_data.ntp_tick_adj <<= NTP_SCALE_SHIFT; + tk_ntp_data[TIMEKEEPER_CORE].ntp_tick_adj <<= NTP_SCALE_SHIFT; return 1; } - __setup("ntp_tick_adj=", ntp_tick_adj_setup); void __init ntp_init(void) { - ntp_clear(); + for (int id = 0; id < TIMEKEEPERS_MAX; id++) + __ntp_clear(tk_ntp_data + id); ntp_init_cmos_sync(); } -- cgit v1.2.3 From 5ffa25f573cf524ff53660c5ff7a158ee10f23c7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 19 May 2025 10:33:22 +0200 Subject: ntp: Add timekeeper ID arguments to public functions In preparation for supporting auxiliary POSIX clocks, add a timekeeper ID to the relevant functions. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250519083026.032425931@linutronix.de --- kernel/time/ntp.c | 33 +++++++++++++++++++-------------- kernel/time/ntp_internal.h | 11 +++++------ kernel/time/timekeeping.c | 12 ++++++------ 3 files changed, 30 insertions(+), 26 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 5b5a0f76866d..e28dc53194a7 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -351,33 +351,38 @@ static void __ntp_clear(struct ntp_data *ntpdata) /** * ntp_clear - Clears the NTP state variables + * @tkid: Timekeeper ID to be able to select proper ntp data array member */ -void ntp_clear(void) +void ntp_clear(unsigned int tkid) { - __ntp_clear(&tk_ntp_data[TIMEKEEPER_CORE]); + __ntp_clear(&tk_ntp_data[tkid]); } -u64 ntp_tick_length(void) +u64 ntp_tick_length(unsigned int tkid) { - return tk_ntp_data[TIMEKEEPER_CORE].tick_length; + return tk_ntp_data[tkid].tick_length; } /** * ntp_get_next_leap - Returns the next leapsecond in CLOCK_REALTIME ktime_t + * @tkid: Timekeeper ID * - * Provides the time of the next leapsecond against CLOCK_REALTIME in - * a ktime_t format. Returns KTIME_MAX if no leapsecond is pending. + * Returns: For @tkid == TIMEKEEPER_CORE this provides the time of the next + * leap second against CLOCK_REALTIME in a ktime_t format if a + * leap second is pending. KTIME_MAX otherwise. */ -ktime_t ntp_get_next_leap(void) +ktime_t ntp_get_next_leap(unsigned int tkid) { struct ntp_data *ntpdata = &tk_ntp_data[TIMEKEEPER_CORE]; - ktime_t ret; + + if (tkid != TIMEKEEPER_CORE) + return KTIME_MAX; if ((ntpdata->time_state == TIME_INS) && (ntpdata->time_status & STA_INS)) return ktime_set(ntpdata->ntp_next_leap_sec, 0); - ret = KTIME_MAX; - return ret; + + return KTIME_MAX; } /* @@ -390,9 +395,9 @@ ktime_t ntp_get_next_leap(void) * * Also handles leap second processing, and returns leap offset */ -int second_overflow(time64_t secs) +int second_overflow(unsigned int tkid, time64_t secs) { - struct ntp_data *ntpdata = &tk_ntp_data[TIMEKEEPER_CORE]; + struct ntp_data *ntpdata = &tk_ntp_data[tkid]; s64 delta; int leap = 0; s32 rem; @@ -762,10 +767,10 @@ static inline void process_adjtimex_modes(struct ntp_data *ntpdata, const struct * adjtimex() mainly allows reading (and writing, if superuser) of * kernel time-keeping variables. used by xntpd. */ -int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts, +int __do_adjtimex(unsigned int tkid, struct __kernel_timex *txc, const struct timespec64 *ts, s32 *time_tai, struct audit_ntp_data *ad) { - struct ntp_data *ntpdata = &tk_ntp_data[TIMEKEEPER_CORE]; + struct ntp_data *ntpdata = &tk_ntp_data[tkid]; int result; if (txc->modes & ADJ_ADJTIME) { diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h index 5a633dce9057..2d3e9669730b 100644 --- a/kernel/time/ntp_internal.h +++ b/kernel/time/ntp_internal.h @@ -3,13 +3,12 @@ #define _LINUX_NTP_INTERNAL_H extern void ntp_init(void); -extern void ntp_clear(void); +extern void ntp_clear(unsigned int tkid); /* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */ -extern u64 ntp_tick_length(void); -extern ktime_t ntp_get_next_leap(void); -extern int second_overflow(time64_t secs); -extern int __do_adjtimex(struct __kernel_timex *txc, - const struct timespec64 *ts, +extern u64 ntp_tick_length(unsigned int tkid); +extern ktime_t ntp_get_next_leap(unsigned int tkid); +extern int second_overflow(unsigned int tkid, time64_t secs); +extern int __do_adjtimex(unsigned int tkid, struct __kernel_timex *txc, const struct timespec64 *ts, s32 *time_tai, struct audit_ntp_data *ad); extern void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index f4692fc2ea6b..e1b8e2618ca7 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -601,7 +601,7 @@ EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); */ static inline void tk_update_leap_state(struct timekeeper *tk) { - tk->next_leap_ktime = ntp_get_next_leap(); + tk->next_leap_ktime = ntp_get_next_leap(tk->id); if (tk->next_leap_ktime != KTIME_MAX) /* Convert to monotonic time */ tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real); @@ -678,7 +678,7 @@ static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int act if (action & TK_CLEAR_NTP) { tk->ntp_error = 0; - ntp_clear(); + ntp_clear(tk->id); } tk_update_leap_state(tk); @@ -2049,7 +2049,7 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk, */ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) { - u64 ntp_tl = ntp_tick_length(); + u64 ntp_tl = ntp_tick_length(tk->id); u32 mult; /* @@ -2130,7 +2130,7 @@ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) } /* Figure out if its a leap sec and apply if needed */ - leap = second_overflow(tk->xtime_sec); + leap = second_overflow(tk->id, tk->xtime_sec); if (unlikely(leap)) { struct timespec64 ts; @@ -2227,7 +2227,7 @@ static bool __timekeeping_advance(enum timekeeping_adv_mode mode) shift = ilog2(offset) - ilog2(tk->cycle_interval); shift = max(0, shift); /* Bound shift to one less than what overflows tick_length */ - maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; + maxshift = (64 - (ilog2(ntp_tick_length(tk->id)) + 1)) - 1; shift = min(shift, maxshift); while (offset >= tk->cycle_interval) { offset = logarithmic_accumulation(tk, offset, shift, &clock_set); @@ -2586,7 +2586,7 @@ int do_adjtimex(struct __kernel_timex *txc) } orig_tai = tai = tks->tai_offset; - ret = __do_adjtimex(txc, &ts, &tai, &ad); + ret = __do_adjtimex(tks->id, txc, &ts, &tai, &ad); if (tai != orig_tai) { __timekeeping_set_tai_offset(tks, tai); -- cgit v1.2.3 From c7ebfbc440151ae4a66a03b0f879cbece45174c8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 19 May 2025 10:33:23 +0200 Subject: ntp: Rename __do_adjtimex() to ntp_adjtimex() Clean up the name space. No functional change. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250519083026.095637820@linutronix.de --- kernel/time/ntp.c | 4 ++-- kernel/time/ntp_internal.h | 4 ++-- kernel/time/timekeeping.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index e28dc53194a7..9aba1bc7b2a7 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -767,8 +767,8 @@ static inline void process_adjtimex_modes(struct ntp_data *ntpdata, const struct * adjtimex() mainly allows reading (and writing, if superuser) of * kernel time-keeping variables. used by xntpd. */ -int __do_adjtimex(unsigned int tkid, struct __kernel_timex *txc, const struct timespec64 *ts, - s32 *time_tai, struct audit_ntp_data *ad) +int ntp_adjtimex(unsigned int tkid, struct __kernel_timex *txc, const struct timespec64 *ts, + s32 *time_tai, struct audit_ntp_data *ad) { struct ntp_data *ntpdata = &tk_ntp_data[tkid]; int result; diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h index 2d3e9669730b..7084d839c207 100644 --- a/kernel/time/ntp_internal.h +++ b/kernel/time/ntp_internal.h @@ -8,8 +8,8 @@ extern void ntp_clear(unsigned int tkid); extern u64 ntp_tick_length(unsigned int tkid); extern ktime_t ntp_get_next_leap(unsigned int tkid); extern int second_overflow(unsigned int tkid, time64_t secs); -extern int __do_adjtimex(unsigned int tkid, struct __kernel_timex *txc, const struct timespec64 *ts, - s32 *time_tai, struct audit_ntp_data *ad); +extern int ntp_adjtimex(unsigned int tkid, struct __kernel_timex *txc, const struct timespec64 *ts, + s32 *time_tai, struct audit_ntp_data *ad); extern void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts); #if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e1b8e2618ca7..99b4749f0665 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2586,7 +2586,7 @@ int do_adjtimex(struct __kernel_timex *txc) } orig_tai = tai = tks->tai_offset; - ret = __do_adjtimex(tks->id, txc, &ts, &tai, &ad); + ret = ntp_adjtimex(tks->id, txc, &ts, &tai, &ad); if (tai != orig_tai) { __timekeeping_set_tai_offset(tks, tai); -- cgit v1.2.3 From 926ad475169f5b24868438e4bff61ec6a73efd19 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 19 May 2025 10:33:25 +0200 Subject: timekeeping: Make __timekeeping_advance() reusable In __timekeeping_advance() the pointer to struct tk_data is hardcoded by the use of &tk_core. As long as there is only a single timekeeper (tk_core), this is not a problem. But when __timekeeping_advance() will be reused for per auxiliary timekeepers, __timekeeping_advance() needs to be generalized. Add a pointer to struct tk_data as function argument of __timekeeping_advance() and adapt all call sites. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250519083026.160967312@linutronix.de --- kernel/time/timekeeping.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 99b4749f0665..153f760dffb4 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2196,10 +2196,10 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset, * timekeeping_advance - Updates the timekeeper to the current time and * current NTP tick length */ -static bool __timekeeping_advance(enum timekeeping_adv_mode mode) +static bool __timekeeping_advance(struct tk_data *tkd, enum timekeeping_adv_mode mode) { - struct timekeeper *tk = &tk_core.shadow_timekeeper; - struct timekeeper *real_tk = &tk_core.timekeeper; + struct timekeeper *tk = &tkd->shadow_timekeeper; + struct timekeeper *real_tk = &tkd->timekeeper; unsigned int clock_set = 0; int shift = 0, maxshift; u64 offset, orig_offset; @@ -2252,7 +2252,7 @@ static bool __timekeeping_advance(enum timekeeping_adv_mode mode) if (orig_offset != offset) tk_update_coarse_nsecs(tk); - timekeeping_update_from_shadow(&tk_core, clock_set); + timekeeping_update_from_shadow(tkd, clock_set); return !!clock_set; } @@ -2260,7 +2260,7 @@ static bool __timekeeping_advance(enum timekeeping_adv_mode mode) static bool timekeeping_advance(enum timekeeping_adv_mode mode) { guard(raw_spinlock_irqsave)(&tk_core.lock); - return __timekeeping_advance(mode); + return __timekeeping_advance(&tk_core, mode); } /** @@ -2598,7 +2598,7 @@ int do_adjtimex(struct __kernel_timex *txc) /* Update the multiplier immediately if frequency was set directly */ if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK)) - clock_set |= __timekeeping_advance(TK_ADV_FREQ); + clock_set |= __timekeeping_advance(&tk_core, TK_ADV_FREQ); } if (txc->modes & ADJ_SETOFFSET) -- cgit v1.2.3 From 8c782acd3f47e21f9b03fd3720172d1f8e4fb796 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 19 May 2025 10:33:26 +0200 Subject: timekeeping: Prepare timekeeping_update_from_shadow() Don't invoke the VDSO and paravirt updates when utilized for auxiliary clocks. This is a temporary workaround until the VDSO and paravirt interfaces have been worked out. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250519083026.223876435@linutronix.de --- kernel/time/timekeeping.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 153f760dffb4..e3c1a1c1d8c5 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -683,13 +683,15 @@ static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int act tk_update_leap_state(tk); tk_update_ktime_data(tk); + tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real; - update_vsyscall(tk); - update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); + if (tk->id == TIMEKEEPER_CORE) { + update_vsyscall(tk); + update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); - tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real; - update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono); - update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw); + update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono); + update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw); + } if (action & TK_CLOCK_WAS_SET) tk->clock_was_set_seq++; -- cgit v1.2.3 From 6168024604236cb2bb1004ea8459c8ece2c4ef5f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 19 May 2025 10:33:27 +0200 Subject: timekeeping: Add clock_valid flag to timekeeper In preparation for supporting independent auxiliary timekeepers, add a clock valid field and set it to true for the system timekeeper. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250519083026.287145536@linutronix.de --- kernel/time/timekeeping.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e3c1a1c1d8c5..bf59bacc97db 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1665,11 +1665,12 @@ read_persistent_wall_and_boot_offset(struct timespec64 *wall_time, *boot_offset = ns_to_timespec64(local_clock()); } -static __init void tkd_basic_setup(struct tk_data *tkd, enum timekeeper_ids tk_id) +static __init void tkd_basic_setup(struct tk_data *tkd, enum timekeeper_ids tk_id, bool valid) { raw_spin_lock_init(&tkd->lock); seqcount_raw_spinlock_init(&tkd->seq, &tkd->lock); tkd->timekeeper.id = tkd->shadow_timekeeper.id = tk_id; + tkd->timekeeper.clock_valid = tkd->shadow_timekeeper.clock_valid = valid; } /* @@ -1699,7 +1700,7 @@ void __init timekeeping_init(void) struct timekeeper *tks = &tk_core.shadow_timekeeper; struct clocksource *clock; - tkd_basic_setup(&tk_core, TIMEKEEPER_CORE); + tkd_basic_setup(&tk_core, TIMEKEEPER_CORE, true); read_persistent_wall_and_boot_offset(&wall_time, &boot_offset); if (timespec64_valid_settod(&wall_time) && -- cgit v1.2.3 From 22c62b9a84b8f16ca0277e133a0cd62a259fee7c Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 19 May 2025 10:33:28 +0200 Subject: timekeeping: Introduce auxiliary timekeepers Provide timekeepers for auxiliary clocks and initialize them during boot. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250519083026.350061049@linutronix.de --- kernel/time/timekeeping.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index bf59bacc97db..19f4af1a37ea 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -53,7 +53,11 @@ struct tk_data { raw_spinlock_t lock; } ____cacheline_aligned; -static struct tk_data tk_core; +static struct tk_data timekeeper_data[TIMEKEEPERS_MAX]; + +/* The core timekeeper */ +#define tk_core (timekeeper_data[TIMEKEEPER_CORE]) + /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; @@ -113,6 +117,12 @@ static struct tk_fast tk_fast_raw ____cacheline_aligned = { .base[1] = FAST_TK_INIT, }; +#ifdef CONFIG_POSIX_AUX_CLOCKS +static __init void tk_aux_setup(void); +#else +static inline void tk_aux_setup(void) { } +#endif + unsigned long timekeeper_lock_irqsave(void) { unsigned long flags; @@ -1589,7 +1599,6 @@ void ktime_get_raw_ts64(struct timespec64 *ts) } EXPORT_SYMBOL(ktime_get_raw_ts64); - /** * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres */ @@ -1701,6 +1710,7 @@ void __init timekeeping_init(void) struct clocksource *clock; tkd_basic_setup(&tk_core, TIMEKEEPER_CORE, true); + tk_aux_setup(); read_persistent_wall_and_boot_offset(&wall_time, &boot_offset); if (timespec64_valid_settod(&wall_time) && @@ -2630,3 +2640,11 @@ void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts) } EXPORT_SYMBOL(hardpps); #endif /* CONFIG_NTP_PPS */ + +#ifdef CONFIG_POSIX_AUX_CLOCKS +static __init void tk_aux_setup(void) +{ + for (int i = TIMEKEEPER_AUX_FIRST; i <= TIMEKEEPER_AUX_LAST; i++) + tkd_basic_setup(&timekeeper_data[i], i, false); +} +#endif /* CONFIG_POSIX_AUX_CLOCKS */ -- cgit v1.2.3 From ffa0519baaed48ca953bd201e1b17f15dae21b2d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 19 May 2025 10:33:30 +0200 Subject: timekeeping: Provide ktime_get_ntp_seconds() ntp_adjtimex() requires access to the actual time keeper per timekeeper ID. Provide an interface. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250519083026.411809421@linutronix.de --- kernel/time/timekeeping.c | 9 +++++++++ kernel/time/timekeeping_internal.h | 3 +++ 2 files changed, 12 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 19f4af1a37ea..7d3693a72a01 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2627,6 +2627,15 @@ int do_adjtimex(struct __kernel_timex *txc) return ret; } +/* + * Invoked from NTP with the time keeper lock held, so lockless access is + * fine. + */ +long ktime_get_ntp_seconds(unsigned int id) +{ + return timekeeper_data[id].timekeeper.xtime_sec; +} + #ifdef CONFIG_NTP_PPS /** * hardpps() - Accessor function to NTP __hardpps function diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h index 8c9079108ffb..973ede670a36 100644 --- a/kernel/time/timekeeping_internal.h +++ b/kernel/time/timekeeping_internal.h @@ -45,4 +45,7 @@ static inline u64 clocksource_delta(u64 now, u64 last, u64 mask, u64 max_delta) unsigned long timekeeper_lock_irqsave(void); void timekeeper_unlock_irqrestore(unsigned long flags); +/* NTP specific interface to access the current seconds value */ +long ktime_get_ntp_seconds(unsigned int id); + #endif /* _TIMEKEEPING_INTERNAL_H */ -- cgit v1.2.3 From c85f5ab60820bde1510110e403d17456fbb8c266 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 19 May 2025 10:33:31 +0200 Subject: ntp: Use ktime_get_ntp_seconds() Use ktime_get_ntp_seconds() to prepare for auxiliary clocks so that the readout becomes per timekeeper. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250519083026.472512636@linutronix.de --- kernel/time/ntp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 9aba1bc7b2a7..97fa99b96dd0 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -303,7 +303,7 @@ static void ntp_update_offset(struct ntp_data *ntpdata, long offset) * Select how the frequency is to be controlled * and in which mode (PLL or FLL). */ - real_secs = __ktime_get_real_seconds(); + real_secs = ktime_get_ntp_seconds(ntpdata - tk_ntp_data); secs = (long)(real_secs - ntpdata->time_reftime); if (unlikely(ntpdata->time_status & STA_FREQHOLD)) secs = 0; @@ -710,7 +710,7 @@ static inline void process_adj_status(struct ntp_data *ntpdata, const struct __k * reference time to current time. */ if (!(ntpdata->time_status & STA_PLL) && (txc->status & STA_PLL)) - ntpdata->time_reftime = __ktime_get_real_seconds(); + ntpdata->time_reftime = ktime_get_ntp_seconds(ntpdata - tk_ntp_data); /* only set allowed bits */ ntpdata->time_status &= STA_RONLY; -- cgit v1.2.3 From 9f7729480a2c771bbe49b7eab034a8eaa5e27bfb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Jun 2025 20:38:29 +0200 Subject: timekeeping: Update auxiliary timekeepers on clocksource change Propagate a system clocksource change to the auxiliary timekeepers so that they can pick up the new clocksource. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250625183757.803890875@linutronix.de --- kernel/time/timekeeping.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 7d3693a72a01..ee9757018341 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -119,8 +119,10 @@ static struct tk_fast tk_fast_raw ____cacheline_aligned = { #ifdef CONFIG_POSIX_AUX_CLOCKS static __init void tk_aux_setup(void); +static void tk_aux_update_clocksource(void); #else static inline void tk_aux_setup(void) { } +static inline void tk_aux_update_clocksource(void) { } #endif unsigned long timekeeper_lock_irqsave(void) @@ -1548,6 +1550,8 @@ static int change_clocksource(void *data) timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); } + tk_aux_update_clocksource(); + if (old) { if (old->disable) old->disable(old); @@ -2651,6 +2655,35 @@ EXPORT_SYMBOL(hardpps); #endif /* CONFIG_NTP_PPS */ #ifdef CONFIG_POSIX_AUX_CLOCKS + +/* + * Bitmap for the activated auxiliary timekeepers to allow lockless quick + * checks in the hot paths without touching extra cache lines. If set, then + * the state of the corresponding timekeeper has to be re-checked under + * timekeeper::lock. + */ +static unsigned long aux_timekeepers; + +/* Invoked from timekeeping after a clocksource change */ +static void tk_aux_update_clocksource(void) +{ + unsigned long active = READ_ONCE(aux_timekeepers); + unsigned int id; + + for_each_set_bit(id, &active, BITS_PER_LONG) { + struct tk_data *tkd = &timekeeper_data[id + TIMEKEEPER_AUX_FIRST]; + struct timekeeper *tks = &tkd->shadow_timekeeper; + + guard(raw_spinlock_irqsave)(&tkd->lock); + if (!tks->clock_valid) + continue; + + timekeeping_forward_now(tks); + tk_setup_internals(tks, tk_core.timekeeper.tkr_mono.clock); + timekeeping_update_from_shadow(tkd, TK_UPDATE_ALL); + } +} + static __init void tk_aux_setup(void) { for (int i = TIMEKEEPER_AUX_FIRST; i <= TIMEKEEPER_AUX_LAST; i++) -- cgit v1.2.3 From 05bc6e6290f91d2d40086ab4ef52da21c14ec4b6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Jun 2025 20:38:31 +0200 Subject: timekeeping: Provide time getters for auxiliary clocks Provide interfaces similar to the ktime_get*() family which provide access to the auxiliary clocks. These interfaces have a boolean return value, which indicates whether the accessed clock is valid or not. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250625183757.868342628@linutronix.de --- kernel/time/timekeeping.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index ee9757018341..c7d2913e68c3 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2664,6 +2664,18 @@ EXPORT_SYMBOL(hardpps); */ static unsigned long aux_timekeepers; +static inline unsigned int clockid_to_tkid(unsigned int id) +{ + return TIMEKEEPER_AUX_FIRST + id - CLOCK_AUX; +} + +static inline struct tk_data *aux_get_tk_data(clockid_t id) +{ + if (!clockid_aux_valid(id)) + return NULL; + return &timekeeper_data[clockid_to_tkid(id)]; +} + /* Invoked from timekeeping after a clocksource change */ static void tk_aux_update_clocksource(void) { @@ -2684,6 +2696,59 @@ static void tk_aux_update_clocksource(void) } } +/** + * ktime_get_aux - Get time for a AUX clock + * @id: ID of the clock to read (CLOCK_AUX...) + * @kt: Pointer to ktime_t to store the time stamp + * + * Returns: True if the timestamp is valid, false otherwise + */ +bool ktime_get_aux(clockid_t id, ktime_t *kt) +{ + struct tk_data *aux_tkd = aux_get_tk_data(id); + struct timekeeper *aux_tk; + unsigned int seq; + ktime_t base; + u64 nsecs; + + WARN_ON(timekeeping_suspended); + + if (!aux_tkd) + return false; + + aux_tk = &aux_tkd->timekeeper; + do { + seq = read_seqcount_begin(&aux_tkd->seq); + if (!aux_tk->clock_valid) + return false; + + base = ktime_add(aux_tk->tkr_mono.base, aux_tk->offs_aux); + nsecs = timekeeping_get_ns(&aux_tk->tkr_mono); + } while (read_seqcount_retry(&aux_tkd->seq, seq)); + + *kt = ktime_add_ns(base, nsecs); + return true; +} +EXPORT_SYMBOL_GPL(ktime_get_aux); + +/** + * ktime_get_aux_ts64 - Get time for a AUX clock + * @id: ID of the clock to read (CLOCK_AUX...) + * @ts: Pointer to timespec64 to store the time stamp + * + * Returns: True if the timestamp is valid, false otherwise + */ +bool ktime_get_aux_ts64(clockid_t id, struct timespec64 *ts) +{ + ktime_t now; + + if (!ktime_get_aux(id, &now)) + return false; + *ts = ktime_to_timespec64(now); + return true; +} +EXPORT_SYMBOL_GPL(ktime_get_aux_ts64); + static __init void tk_aux_setup(void) { for (int i = TIMEKEEPER_AUX_FIRST; i <= TIMEKEEPER_AUX_LAST; i++) -- cgit v1.2.3 From 606424bf4ffd9d27865c45b5707c1edac6b187ed Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Jun 2025 20:38:32 +0200 Subject: timekeeping: Add minimal posix-timers support for auxiliary clocks Provide clock_getres(2) and clock_gettime(2) for auxiliary clocks. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250625183757.932220594@linutronix.de --- kernel/time/posix-timers.c | 3 +++ kernel/time/posix-timers.h | 1 + kernel/time/timekeeping.c | 21 +++++++++++++++++++++ 3 files changed, 25 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 2053b1a4c9e4..8b582174b1f9 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1526,6 +1526,9 @@ static const struct k_clock * const posix_clocks[] = { [CLOCK_REALTIME_ALARM] = &alarm_clock, [CLOCK_BOOTTIME_ALARM] = &alarm_clock, [CLOCK_TAI] = &clock_tai, +#ifdef CONFIG_POSIX_AUX_CLOCKS + [CLOCK_AUX ... CLOCK_AUX_LAST] = &clock_aux, +#endif }; static const struct k_clock *clockid_to_kclock(const clockid_t id) diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h index 61906f0688c1..7f259e845d24 100644 --- a/kernel/time/posix-timers.h +++ b/kernel/time/posix-timers.h @@ -41,6 +41,7 @@ extern const struct k_clock clock_posix_dynamic; extern const struct k_clock clock_process; extern const struct k_clock clock_thread; extern const struct k_clock alarm_clock; +extern const struct k_clock clock_aux; void posix_timer_queue_signal(struct k_itimer *timr); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c7d2913e68c3..10c6e37dc0dc 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2655,6 +2655,7 @@ EXPORT_SYMBOL(hardpps); #endif /* CONFIG_NTP_PPS */ #ifdef CONFIG_POSIX_AUX_CLOCKS +#include "posix-timers.h" /* * Bitmap for the activated auxiliary timekeepers to allow lockless quick @@ -2749,6 +2750,26 @@ bool ktime_get_aux_ts64(clockid_t id, struct timespec64 *ts) } EXPORT_SYMBOL_GPL(ktime_get_aux_ts64); +static int aux_get_res(clockid_t id, struct timespec64 *tp) +{ + if (!clockid_aux_valid(id)) + return -ENODEV; + + tp->tv_sec = 0; + tp->tv_nsec = 1; + return 0; +} + +static int aux_get_timespec(clockid_t id, struct timespec64 *tp) +{ + return ktime_get_aux_ts64(id, tp) ? 0 : -ENODEV; +} + +const struct k_clock clock_aux = { + .clock_getres = aux_get_res, + .clock_get_timespec = aux_get_timespec, +}; + static __init void tk_aux_setup(void) { for (int i = TIMEKEEPER_AUX_FIRST; i <= TIMEKEEPER_AUX_LAST; i++) -- cgit v1.2.3 From 60ecc26ec5af567a55f362ad92c0cac8b894541c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Jun 2025 20:38:34 +0200 Subject: timekeeping: Provide time setter for auxiliary clocks Add clock_settime(2) support for auxiliary clocks. The function affects the AUX offset which is added to the "monotonic" clock readout of these clocks. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250625183757.995688714@linutronix.de --- kernel/time/timekeeping.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 10c6e37dc0dc..b6ac7847bc0a 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2765,9 +2765,53 @@ static int aux_get_timespec(clockid_t id, struct timespec64 *tp) return ktime_get_aux_ts64(id, tp) ? 0 : -ENODEV; } +static int aux_clock_set(const clockid_t id, const struct timespec64 *tnew) +{ + struct tk_data *aux_tkd = aux_get_tk_data(id); + struct timekeeper *aux_tks; + ktime_t tnow, nsecs; + + if (!timespec64_valid_settod(tnew)) + return -EINVAL; + if (!aux_tkd) + return -ENODEV; + + aux_tks = &aux_tkd->shadow_timekeeper; + + guard(raw_spinlock_irq)(&aux_tkd->lock); + if (!aux_tks->clock_valid) + return -ENODEV; + + /* Forward the timekeeper base time */ + timekeeping_forward_now(aux_tks); + /* + * Get the updated base time. tkr_mono.base has not been + * updated yet, so do that first. That makes the update + * in timekeeping_update_from_shadow() redundant, but + * that's harmless. After that @tnow can be calculated + * by using tkr_mono::cycle_last, which has been set + * by timekeeping_forward_now(). + */ + tk_update_ktime_data(aux_tks); + nsecs = timekeeping_cycles_to_ns(&aux_tks->tkr_mono, aux_tks->tkr_mono.cycle_last); + tnow = ktime_add(aux_tks->tkr_mono.base, nsecs); + + /* + * Calculate the new AUX offset as delta to @tnow ("monotonic"). + * That avoids all the tk::xtime back and forth conversions as + * xtime ("realtime") is not applicable for auxiliary clocks and + * kept in sync with "monotonic". + */ + aux_tks->offs_aux = ktime_sub(timespec64_to_ktime(*tnew), tnow); + + timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL); + return 0; +} + const struct k_clock clock_aux = { .clock_getres = aux_get_res, .clock_get_timespec = aux_get_timespec, + .clock_set = aux_clock_set, }; static __init void tk_aux_setup(void) -- cgit v1.2.3 From e8db3a55798d70f2c222c6103990776fca6a6ebc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Jun 2025 20:38:40 +0200 Subject: timekeeping: Make timekeeping_inject_offset() reusable Split out the inner workings for auxiliary clock support and feed the core time keeper into it. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250625183758.059934561@linutronix.de --- kernel/time/timekeeping.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b6ac7847bc0a..2d294cfe185e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1433,32 +1433,32 @@ EXPORT_SYMBOL(do_settimeofday64); /** * __timekeeping_inject_offset - Adds or subtracts from the current time. + * @tkd: Pointer to the timekeeper to modify * @ts: Pointer to the timespec variable containing the offset * * Adds or subtracts an offset value from the current time. */ -static int __timekeeping_inject_offset(const struct timespec64 *ts) +static int __timekeeping_inject_offset(struct tk_data *tkd, const struct timespec64 *ts) { - struct timekeeper *tks = &tk_core.shadow_timekeeper; + struct timekeeper *tks = &tkd->shadow_timekeeper; struct timespec64 tmp; if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC) return -EINVAL; - timekeeping_forward_now(tks); /* Make sure the proposed value is valid */ tmp = timespec64_add(tk_xtime(tks), *ts); if (timespec64_compare(&tks->wall_to_monotonic, ts) > 0 || !timespec64_valid_settod(&tmp)) { - timekeeping_restore_shadow(&tk_core); + timekeeping_restore_shadow(tkd); return -EINVAL; } tk_xtime_add(tks, ts); tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, *ts)); - timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); + timekeeping_update_from_shadow(tkd, TK_UPDATE_ALL); return 0; } @@ -1467,7 +1467,7 @@ static int timekeeping_inject_offset(const struct timespec64 *ts) int ret; scoped_guard (raw_spinlock_irqsave, &tk_core.lock) - ret = __timekeeping_inject_offset(ts); + ret = __timekeeping_inject_offset(&tk_core, ts); /* Signal hrtimers about time change */ if (!ret) @@ -2568,6 +2568,7 @@ EXPORT_SYMBOL_GPL(random_get_entropy_fallback); */ int do_adjtimex(struct __kernel_timex *txc) { + struct tk_data *tkd = &tk_core; struct timespec64 delta, ts; struct audit_ntp_data ad; bool offset_set = false; @@ -2585,16 +2586,19 @@ int do_adjtimex(struct __kernel_timex *txc) ktime_get_real_ts64(&ts); add_device_randomness(&ts, sizeof(ts)); - scoped_guard (raw_spinlock_irqsave, &tk_core.lock) { - struct timekeeper *tks = &tk_core.shadow_timekeeper; + scoped_guard (raw_spinlock_irqsave, &tkd->lock) { + struct timekeeper *tks = &tkd->shadow_timekeeper; s32 orig_tai, tai; + if (!tks->clock_valid) + return -ENODEV; + if (txc->modes & ADJ_SETOFFSET) { delta.tv_sec = txc->time.tv_sec; delta.tv_nsec = txc->time.tv_usec; if (!(txc->modes & ADJ_NANO)) delta.tv_nsec *= 1000; - ret = __timekeeping_inject_offset(&delta); + ret = __timekeeping_inject_offset(tkd, &delta); if (ret) return ret; @@ -2607,7 +2611,7 @@ int do_adjtimex(struct __kernel_timex *txc) if (tai != orig_tai) { __timekeeping_set_tai_offset(tks, tai); - timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET); + timekeeping_update_from_shadow(tkd, TK_CLOCK_WAS_SET); clock_set = true; } else { tk_update_leap_state_all(&tk_core); @@ -2615,7 +2619,7 @@ int do_adjtimex(struct __kernel_timex *txc) /* Update the multiplier immediately if frequency was set directly */ if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK)) - clock_set |= __timekeeping_advance(&tk_core, TK_ADV_FREQ); + clock_set |= __timekeeping_advance(tkd, TK_ADV_FREQ); } if (txc->modes & ADJ_SETOFFSET) -- cgit v1.2.3 From 2c8aea59c206b12b436373861590baeda728be12 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Jun 2025 20:38:42 +0200 Subject: timekeeping: Add auxiliary clock support to __timekeeping_inject_offset() Redirect the relative offset adjustment to the auxiliary clock offset instead of modifying CLOCK_REALTIME, which has no meaning in context of these clocks. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250625183758.124057787@linutronix.de --- kernel/time/timekeeping.c | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 2d294cfe185e..e893557cd53f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1431,6 +1431,11 @@ int do_settimeofday64(const struct timespec64 *ts) } EXPORT_SYMBOL(do_settimeofday64); +static inline bool timekeeper_is_core_tk(struct timekeeper *tk) +{ + return !IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS) || tk->id == TIMEKEEPER_CORE; +} + /** * __timekeeping_inject_offset - Adds or subtracts from the current time. * @tkd: Pointer to the timekeeper to modify @@ -1448,16 +1453,34 @@ static int __timekeeping_inject_offset(struct tk_data *tkd, const struct timespe timekeeping_forward_now(tks); - /* Make sure the proposed value is valid */ - tmp = timespec64_add(tk_xtime(tks), *ts); - if (timespec64_compare(&tks->wall_to_monotonic, ts) > 0 || - !timespec64_valid_settod(&tmp)) { - timekeeping_restore_shadow(tkd); - return -EINVAL; + if (timekeeper_is_core_tk(tks)) { + /* Make sure the proposed value is valid */ + tmp = timespec64_add(tk_xtime(tks), *ts); + if (timespec64_compare(&tks->wall_to_monotonic, ts) > 0 || + !timespec64_valid_settod(&tmp)) { + timekeeping_restore_shadow(tkd); + return -EINVAL; + } + + tk_xtime_add(tks, ts); + tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, *ts)); + } else { + struct tk_read_base *tkr_mono = &tks->tkr_mono; + ktime_t now, offs; + + /* Get the current time */ + now = ktime_add_ns(tkr_mono->base, timekeeping_get_ns(tkr_mono)); + /* Add the relative offset change */ + offs = ktime_add(tks->offs_aux, timespec64_to_ktime(*ts)); + + /* Prevent that the resulting time becomes negative */ + if (ktime_add(now, offs) < 0) { + timekeeping_restore_shadow(tkd); + return -EINVAL; + } + tks->offs_aux = offs; } - tk_xtime_add(tks, ts); - tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, *ts)); timekeeping_update_from_shadow(tkd, TK_UPDATE_ALL); return 0; } -- cgit v1.2.3 From 775f71ebedd382da390dc16a4c28cffa5b937f79 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Jun 2025 20:38:43 +0200 Subject: timekeeping: Make do_adjtimex() reusable Split out the actual functionality of adjtimex() and make do_adjtimex() a wrapper which feeds the core timekeeper into it and handles the result including audit at the call site. This allows to reuse the actual functionality for auxiliary clocks. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250625183758.187322876@linutronix.de --- kernel/time/timekeeping.c | 102 +++++++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 46 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e893557cd53f..0de61315b40c 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2585,17 +2585,18 @@ unsigned long random_get_entropy_fallback(void) } EXPORT_SYMBOL_GPL(random_get_entropy_fallback); -/** - * do_adjtimex() - Accessor function to NTP __do_adjtimex function - * @txc: Pointer to kernel_timex structure containing NTP parameters - */ -int do_adjtimex(struct __kernel_timex *txc) +struct adjtimex_result { + struct audit_ntp_data ad; + struct timespec64 delta; + bool clock_set; +}; + +static int __do_adjtimex(struct tk_data *tkd, struct __kernel_timex *txc, + struct adjtimex_result *result) { - struct tk_data *tkd = &tk_core; - struct timespec64 delta, ts; - struct audit_ntp_data ad; - bool offset_set = false; - bool clock_set = false; + struct timekeeper *tks = &tkd->shadow_timekeeper; + struct timespec64 ts; + s32 orig_tai, tai; int ret; /* Validate the data before disabling interrupts */ @@ -2604,56 +2605,65 @@ int do_adjtimex(struct __kernel_timex *txc) return ret; add_device_randomness(txc, sizeof(*txc)); - audit_ntp_init(&ad); - ktime_get_real_ts64(&ts); add_device_randomness(&ts, sizeof(ts)); - scoped_guard (raw_spinlock_irqsave, &tkd->lock) { - struct timekeeper *tks = &tkd->shadow_timekeeper; - s32 orig_tai, tai; + guard(raw_spinlock_irqsave)(&tkd->lock); - if (!tks->clock_valid) - return -ENODEV; - - if (txc->modes & ADJ_SETOFFSET) { - delta.tv_sec = txc->time.tv_sec; - delta.tv_nsec = txc->time.tv_usec; - if (!(txc->modes & ADJ_NANO)) - delta.tv_nsec *= 1000; - ret = __timekeeping_inject_offset(tkd, &delta); - if (ret) - return ret; - - offset_set = delta.tv_sec != 0; - clock_set = true; - } + if (!tks->clock_valid) + return -ENODEV; - orig_tai = tai = tks->tai_offset; - ret = ntp_adjtimex(tks->id, txc, &ts, &tai, &ad); + if (txc->modes & ADJ_SETOFFSET) { + result->delta.tv_sec = txc->time.tv_sec; + result->delta.tv_nsec = txc->time.tv_usec; + if (!(txc->modes & ADJ_NANO)) + result->delta.tv_nsec *= 1000; + ret = __timekeeping_inject_offset(tkd, &result->delta); + if (ret) + return ret; + result->clock_set = true; + } - if (tai != orig_tai) { - __timekeeping_set_tai_offset(tks, tai); - timekeeping_update_from_shadow(tkd, TK_CLOCK_WAS_SET); - clock_set = true; - } else { - tk_update_leap_state_all(&tk_core); - } + orig_tai = tai = tks->tai_offset; + ret = ntp_adjtimex(tks->id, txc, &ts, &tai, &result->ad); - /* Update the multiplier immediately if frequency was set directly */ - if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK)) - clock_set |= __timekeeping_advance(tkd, TK_ADV_FREQ); + if (tai != orig_tai) { + __timekeeping_set_tai_offset(tks, tai); + timekeeping_update_from_shadow(tkd, TK_CLOCK_WAS_SET); + result->clock_set = true; + } else { + tk_update_leap_state_all(&tk_core); } + /* Update the multiplier immediately if frequency was set directly */ + if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK)) + result->clock_set |= __timekeeping_advance(tkd, TK_ADV_FREQ); + + return ret; +} + +/** + * do_adjtimex() - Accessor function to NTP __do_adjtimex function + * @txc: Pointer to kernel_timex structure containing NTP parameters + */ +int do_adjtimex(struct __kernel_timex *txc) +{ + struct adjtimex_result result = { }; + int ret; + + ret = __do_adjtimex(&tk_core, txc, &result); + if (ret < 0) + return ret; + if (txc->modes & ADJ_SETOFFSET) - audit_tk_injoffset(delta); + audit_tk_injoffset(result.delta); - audit_ntp_log(&ad); + audit_ntp_log(&result.ad); - if (clock_set) + if (result.clock_set) clock_was_set(CLOCK_SET_WALL); - ntp_notify_cmos_timer(offset_set); + ntp_notify_cmos_timer(result.delta.tv_sec != 0); return ret; } -- cgit v1.2.3 From 4eca49d0b621b314ac7c80f363932ec6f6c8abc8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Jun 2025 20:38:45 +0200 Subject: timekeeping: Prepare do_adtimex() for auxiliary clocks Exclude ADJ_TAI, leap seconds and PPS functionality as they make no sense in the context of auxiliary clocks and provide a time stamp based on the actual clock. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250625183758.253203783@linutronix.de --- kernel/time/timekeeping.c | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 0de61315b40c..6770544f8c0e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -58,6 +58,17 @@ static struct tk_data timekeeper_data[TIMEKEEPERS_MAX]; /* The core timekeeper */ #define tk_core (timekeeper_data[TIMEKEEPER_CORE]) +#ifdef CONFIG_POSIX_AUX_CLOCKS +static inline bool tk_get_aux_ts64(unsigned int tkid, struct timespec64 *ts) +{ + return ktime_get_aux_ts64(CLOCK_AUX + tkid - TIMEKEEPER_AUX_FIRST, ts); +} +#else +static inline bool tk_get_aux_ts64(unsigned int tkid, struct timespec64 *ts) +{ + return false; +} +#endif /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; @@ -2508,7 +2519,7 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real, /* * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex */ -static int timekeeping_validate_timex(const struct __kernel_timex *txc) +static int timekeeping_validate_timex(const struct __kernel_timex *txc, bool aux_clock) { if (txc->modes & ADJ_ADJTIME) { /* singleshot must not be used with any other mode bits */ @@ -2567,6 +2578,20 @@ static int timekeeping_validate_timex(const struct __kernel_timex *txc) return -EINVAL; } + if (aux_clock) { + /* Auxiliary clocks are similar to TAI and do not have leap seconds */ + if (txc->status & (STA_INS | STA_DEL)) + return -EINVAL; + + /* No TAI offset setting */ + if (txc->modes & ADJ_TAI) + return -EINVAL; + + /* No PPS support either */ + if (txc->status & (STA_PPSFREQ | STA_PPSTIME)) + return -EINVAL; + } + return 0; } @@ -2595,17 +2620,22 @@ static int __do_adjtimex(struct tk_data *tkd, struct __kernel_timex *txc, struct adjtimex_result *result) { struct timekeeper *tks = &tkd->shadow_timekeeper; + bool aux_clock = !timekeeper_is_core_tk(tks); struct timespec64 ts; s32 orig_tai, tai; int ret; /* Validate the data before disabling interrupts */ - ret = timekeeping_validate_timex(txc); + ret = timekeeping_validate_timex(txc, aux_clock); if (ret) return ret; add_device_randomness(txc, sizeof(*txc)); - ktime_get_real_ts64(&ts); + if (!aux_clock) + ktime_get_real_ts64(&ts); + else + tk_get_aux_ts64(tkd->timekeeper.id, &ts); + add_device_randomness(&ts, sizeof(ts)); guard(raw_spinlock_irqsave)(&tkd->lock); -- cgit v1.2.3 From ecf3e70304911be1c14cd21baa0bc611a53ec50b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Jun 2025 20:38:46 +0200 Subject: timekeeping: Provide adjtimex() for auxiliary clocks The behaviour is close to clock_adtime(CLOCK_REALTIME) with the following differences: 1) ADJ_SETOFFSET adjusts the auxiliary clock offset 2) ADJ_TAI is not supported 3) Leap seconds are not supported 4) PPS is not supported Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250625183758.317946543@linutronix.de --- kernel/time/timekeeping.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 6770544f8c0e..523670ec0d2e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2875,10 +2875,26 @@ static int aux_clock_set(const clockid_t id, const struct timespec64 *tnew) return 0; } +static int aux_clock_adj(const clockid_t id, struct __kernel_timex *txc) +{ + struct tk_data *aux_tkd = aux_get_tk_data(id); + struct adjtimex_result result = { }; + + if (!aux_tkd) + return -ENODEV; + + /* + * @result is ignored for now as there are neither hrtimers nor a + * RTC related to auxiliary clocks for now. + */ + return __do_adjtimex(aux_tkd, txc, &result); +} + const struct k_clock clock_aux = { .clock_getres = aux_get_res, .clock_get_timespec = aux_get_timespec, .clock_set = aux_clock_set, + .clock_adj = aux_clock_adj, }; static __init void tk_aux_setup(void) -- cgit v1.2.3 From e6d4c00719a6b1dda3fb358b4c973595f9dfd455 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Jun 2025 20:38:47 +0200 Subject: timekeeping: Provide update for auxiliary timekeepers Update the auxiliary timekeepers periodically. For now this is tied to the system timekeeper update from the tick. This might be revisited and moved out of the tick. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250625183758.382451331@linutronix.de --- kernel/time/timekeeping.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 523670ec0d2e..568ba1ffba0b 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -131,9 +131,11 @@ static struct tk_fast tk_fast_raw ____cacheline_aligned = { #ifdef CONFIG_POSIX_AUX_CLOCKS static __init void tk_aux_setup(void); static void tk_aux_update_clocksource(void); +static void tk_aux_advance(void); #else static inline void tk_aux_setup(void) { } static inline void tk_aux_update_clocksource(void) { } +static inline void tk_aux_advance(void) { } #endif unsigned long timekeeper_lock_irqsave(void) @@ -2317,11 +2319,13 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode) /** * update_wall_time - Uses the current clocksource to increment the wall time * + * It also updates the enabled auxiliary clock timekeepers */ void update_wall_time(void) { if (timekeeping_advance(TK_ADV_TICK)) clock_was_set_delayed(); + tk_aux_advance(); } /** @@ -2764,6 +2768,21 @@ static void tk_aux_update_clocksource(void) } } +static void tk_aux_advance(void) +{ + unsigned long active = READ_ONCE(aux_timekeepers); + unsigned int id; + + /* Lockless quick check to avoid extra cache lines */ + for_each_set_bit(id, &active, BITS_PER_LONG) { + struct tk_data *aux_tkd = &timekeeper_data[id + TIMEKEEPER_AUX_FIRST]; + + guard(raw_spinlock)(&aux_tkd->lock); + if (aux_tkd->shadow_timekeeper.clock_valid) + __timekeeping_advance(aux_tkd, TK_ADV_TICK); + } +} + /** * ktime_get_aux - Get time for a AUX clock * @id: ID of the clock to read (CLOCK_AUX...) -- cgit v1.2.3 From 7b95663a3d96b39b40f169dba5faef3e20163c5c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Jun 2025 20:38:49 +0200 Subject: timekeeping: Provide interface to control auxiliary clocks Auxiliary clocks are disabled by default and attempts to access them fail. Provide an interface to enable/disable them at run-time. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20250625183758.444626478@linutronix.de --- kernel/time/timekeeping.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 568ba1ffba0b..6a61887eb87e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -2916,6 +2917,121 @@ const struct k_clock clock_aux = { .clock_adj = aux_clock_adj, }; +static void aux_clock_enable(clockid_t id) +{ + struct tk_read_base *tkr_raw = &tk_core.timekeeper.tkr_raw; + struct tk_data *aux_tkd = aux_get_tk_data(id); + struct timekeeper *aux_tks = &aux_tkd->shadow_timekeeper; + + /* Prevent the core timekeeper from changing. */ + guard(raw_spinlock_irq)(&tk_core.lock); + + /* + * Setup the auxiliary clock assuming that the raw core timekeeper + * clock frequency conversion is close enough. Userspace has to + * adjust for the deviation via clock_adjtime(2). + */ + guard(raw_spinlock_nested)(&aux_tkd->lock); + + /* Remove leftovers of a previous registration */ + memset(aux_tks, 0, sizeof(*aux_tks)); + /* Restore the timekeeper id */ + aux_tks->id = aux_tkd->timekeeper.id; + /* Setup the timekeeper based on the current system clocksource */ + tk_setup_internals(aux_tks, tkr_raw->clock); + + /* Mark it valid and set it live */ + aux_tks->clock_valid = true; + timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL); +} + +static void aux_clock_disable(clockid_t id) +{ + struct tk_data *aux_tkd = aux_get_tk_data(id); + + guard(raw_spinlock_irq)(&aux_tkd->lock); + aux_tkd->shadow_timekeeper.clock_valid = false; + timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL); +} + +static DEFINE_MUTEX(aux_clock_mutex); + +static ssize_t aux_clock_enable_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + /* Lazy atoi() as name is "0..7" */ + int id = kobj->name[0] & 0x7; + bool enable; + + if (!capable(CAP_SYS_TIME)) + return -EPERM; + + if (kstrtobool(buf, &enable) < 0) + return -EINVAL; + + guard(mutex)(&aux_clock_mutex); + if (enable == test_bit(id, &aux_timekeepers)) + return count; + + if (enable) { + aux_clock_enable(CLOCK_AUX + id); + set_bit(id, &aux_timekeepers); + } else { + aux_clock_disable(CLOCK_AUX + id); + clear_bit(id, &aux_timekeepers); + } + return count; +} + +static ssize_t aux_clock_enable_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + unsigned long active = READ_ONCE(aux_timekeepers); + /* Lazy atoi() as name is "0..7" */ + int id = kobj->name[0] & 0x7; + + return sysfs_emit(buf, "%d\n", test_bit(id, &active)); +} + +static struct kobj_attribute aux_clock_enable_attr = __ATTR_RW(aux_clock_enable); + +static struct attribute *aux_clock_enable_attrs[] = { + &aux_clock_enable_attr.attr, + NULL +}; + +static const struct attribute_group aux_clock_enable_attr_group = { + .attrs = aux_clock_enable_attrs, +}; + +static int __init tk_aux_sysfs_init(void) +{ + struct kobject *auxo, *tko = kobject_create_and_add("time", kernel_kobj); + + if (!tko) + return -ENOMEM; + + auxo = kobject_create_and_add("aux_clocks", tko); + if (!auxo) { + kobject_put(tko); + return -ENOMEM; + } + + for (int i = 0; i <= MAX_AUX_CLOCKS; i++) { + char id[2] = { [0] = '0' + i, }; + struct kobject *clk = kobject_create_and_add(id, auxo); + + if (!clk) + return -ENOMEM; + + int ret = sysfs_create_group(clk, &aux_clock_enable_attr_group); + + if (ret) + return ret; + } + return 0; +} +late_initcall(tk_aux_sysfs_init); + static __init void tk_aux_setup(void) { for (int i = TIMEKEEPER_AUX_FIRST; i <= TIMEKEEPER_AUX_LAST; i++) -- cgit v1.2.3 From 5b605dbee07dda8fd538af1f07cbf1baf0a49cbc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Jul 2025 15:26:58 +0200 Subject: timekeeping: Provide ktime_get_clock_ts64() PTP implements an inline switch case for taking timestamps from various POSIX clock IDs, which already consumes quite some text space. Expanding it for auxiliary clocks really becomes too big for inlining. Provide a out of line version. The function invalidates the timestamp in case the clock is invalid. The invalidation allows to implement a validation check without the need to propagate a return value through deep existing call chains. Due to merge logistics this temporarily defines CLOCK_AUX[_LAST] if undefined, so that the plain branch, which does not contain any of the core timekeeper changes, can be pulled into the networking tree as prerequisite for the PTP side changes. These temporary defines are removed after that branch is merged into the tip::timers/ptp branch. That way the result in -next or upstream in the next merge window has zero dependencies. Signed-off-by: Thomas Gleixner Reviewed-by: Vadim Fedorenko Acked-by: John Stultz Link: https://lore.kernel.org/all/20250701132628.357686408@linutronix.de --- kernel/time/timekeeping.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index a009c91f7b05..572e3bd0cc94 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1573,6 +1573,39 @@ void ktime_get_raw_ts64(struct timespec64 *ts) } EXPORT_SYMBOL(ktime_get_raw_ts64); +/** + * ktime_get_clock_ts64 - Returns time of a clock in a timespec + * @id: POSIX clock ID of the clock to read + * @ts: Pointer to the timespec64 to be set + * + * The timestamp is invalidated (@ts->sec is set to -1) if the + * clock @id is not available. + */ +void ktime_get_clock_ts64(clockid_t id, struct timespec64 *ts) +{ + /* Invalidate time stamp */ + ts->tv_sec = -1; + ts->tv_nsec = 0; + + switch (id) { + case CLOCK_REALTIME: + ktime_get_real_ts64(ts); + return; + case CLOCK_MONOTONIC: + ktime_get_ts64(ts); + return; + case CLOCK_MONOTONIC_RAW: + ktime_get_raw_ts64(ts); + return; + case CLOCK_AUX ... CLOCK_AUX_LAST: + if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) + ktime_get_aux_ts64(id, ts); + return; + default: + WARN_ON_ONCE(1); + } +} +EXPORT_SYMBOL_GPL(ktime_get_clock_ts64); /** * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres -- cgit v1.2.3 From 6fedaf682a5e1866efdaddc70ff0ada329825d53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 1 Jul 2025 10:57:56 +0200 Subject: vdso/vsyscall: Introduce a helper to fill clock configurations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The logic to configure a 'struct vdso_clock' from a 'struct tk_read_base' is copied two times. Split it into a shared function to reduce the duplication, especially as another user will be added for auxiliary clocks. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250701-vdso-auxclock-v1-2-df7d9f87b9b8@linutronix.de --- kernel/time/vsyscall.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index 32ef27c71b57..d655df259733 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -15,26 +15,25 @@ #include "timekeeping_internal.h" +static inline void fill_clock_configuration(struct vdso_clock *vc, const struct tk_read_base *base) +{ + vc->cycle_last = base->cycle_last; +#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT + vc->max_cycles = base->clock->max_cycles; +#endif + vc->mask = base->mask; + vc->mult = base->mult; + vc->shift = base->shift; +} + static inline void update_vdso_time_data(struct vdso_time_data *vdata, struct timekeeper *tk) { struct vdso_clock *vc = vdata->clock_data; struct vdso_timestamp *vdso_ts; u64 nsec, sec; - vc[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last; -#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT - vc[CS_HRES_COARSE].max_cycles = tk->tkr_mono.clock->max_cycles; -#endif - vc[CS_HRES_COARSE].mask = tk->tkr_mono.mask; - vc[CS_HRES_COARSE].mult = tk->tkr_mono.mult; - vc[CS_HRES_COARSE].shift = tk->tkr_mono.shift; - vc[CS_RAW].cycle_last = tk->tkr_raw.cycle_last; -#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT - vc[CS_RAW].max_cycles = tk->tkr_raw.clock->max_cycles; -#endif - vc[CS_RAW].mask = tk->tkr_raw.mask; - vc[CS_RAW].mult = tk->tkr_raw.mult; - vc[CS_RAW].shift = tk->tkr_raw.shift; + fill_clock_configuration(&vc[CS_HRES_COARSE], &tk->tkr_mono); + fill_clock_configuration(&vc[CS_RAW], &tk->tkr_raw); /* CLOCK_MONOTONIC */ vdso_ts = &vc[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC]; -- cgit v1.2.3 From 76164ca0d113e6a9f3033f948c739586fc606ed1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 1 Jul 2025 10:57:57 +0200 Subject: vdso/vsyscall: Split up __arch_update_vsyscall() into __arch_update_vdso_clock() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The upcoming auxiliary clocks need this hook, too. To separate the architecture hooks from the timekeeper internals, refactor the hook to only operate on a single vDSO clock. While at it, use a more robust #define for the hook override. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250701-vdso-auxclock-v1-3-df7d9f87b9b8@linutronix.de --- kernel/time/vsyscall.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index d655df259733..df6bada2d58e 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -118,7 +118,8 @@ void update_vsyscall(struct timekeeper *tk) if (clock_mode != VDSO_CLOCKMODE_NONE) update_vdso_time_data(vdata, tk); - __arch_update_vsyscall(vdata); + __arch_update_vdso_clock(&vc[CS_HRES_COARSE]); + __arch_update_vdso_clock(&vc[CS_RAW]); vdso_write_end(vdata); -- cgit v1.2.3 From 9b7fc3f14576c268f62fe0b882fac5e61239b659 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 1 Jul 2025 10:58:04 +0200 Subject: vdso: Introduce aux_clock_resolution_ns() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the constant resolution to a shared header, so the vDSO can use it and return it without going through a syscall. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250701-vdso-auxclock-v1-10-df7d9f87b9b8@linutronix.de --- kernel/time/timekeeping.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c6fe89bded02..cbcf090bb4be 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -26,6 +26,8 @@ #include #include +#include + #include "tick-internal.h" #include "ntp_internal.h" #include "timekeeping_internal.h" @@ -2876,8 +2878,8 @@ static int aux_get_res(clockid_t id, struct timespec64 *tp) if (!clockid_aux_valid(id)) return -ENODEV; - tp->tv_sec = 0; - tp->tv_nsec = 1; + tp->tv_sec = aux_clock_resolution_ns() / NSEC_PER_SEC; + tp->tv_nsec = aux_clock_resolution_ns() % NSEC_PER_SEC; return 0; } -- cgit v1.2.3 From 380b84e168e57c54d0a9e053a5558fddc43f0c1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 1 Jul 2025 10:58:05 +0200 Subject: vdso/vsyscall: Update auxiliary clock data in the datapage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expose the auxiliary clock data so it can be read from the vDSO. Architectures not using the generic vDSO time framework, namely SPARC64, are not supported. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250701-vdso-auxclock-v1-11-df7d9f87b9b8@linutronix.de --- kernel/time/namespace.c | 5 +++++ kernel/time/timekeeping.c | 12 ++++++++++++ kernel/time/vsyscall.c | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index e3642278df43..667452768ed3 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -242,6 +242,11 @@ static void timens_set_vvar_page(struct task_struct *task, for (i = 0; i < CS_BASES; i++) timens_setup_vdso_clock_data(&vc[i], ns); + if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) { + for (i = 0; i < ARRAY_SIZE(vdata->aux_clock_data); i++) + timens_setup_vdso_clock_data(&vdata->aux_clock_data[i], ns); + } + out: mutex_unlock(&offset_lock); } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index cbcf090bb4be..243fe25e680a 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -66,11 +66,21 @@ static inline bool tk_get_aux_ts64(unsigned int tkid, struct timespec64 *ts) { return ktime_get_aux_ts64(CLOCK_AUX + tkid - TIMEKEEPER_AUX_FIRST, ts); } + +static inline bool tk_is_aux(const struct timekeeper *tk) +{ + return tk->id >= TIMEKEEPER_AUX_FIRST && tk->id <= TIMEKEEPER_AUX_LAST; +} #else static inline bool tk_get_aux_ts64(unsigned int tkid, struct timespec64 *ts) { return false; } + +static inline bool tk_is_aux(const struct timekeeper *tk) +{ + return false; +} #endif /* flag for if timekeeping is suspended */ @@ -719,6 +729,8 @@ static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int act update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono); update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw); + } else if (tk_is_aux(tk)) { + vdso_time_update_aux(tk); } if (action & TK_CLOCK_WAS_SET) diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index df6bada2d58e..8ba8b0d8a387 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -136,6 +136,46 @@ void update_vsyscall_tz(void) __arch_sync_vdso_time_data(vdata); } +#ifdef CONFIG_POSIX_AUX_CLOCKS +void vdso_time_update_aux(struct timekeeper *tk) +{ + struct vdso_time_data *vdata = vdso_k_time_data; + struct vdso_timestamp *vdso_ts; + struct vdso_clock *vc; + s32 clock_mode; + u64 nsec; + + vc = &vdata->aux_clock_data[tk->id - TIMEKEEPER_AUX_FIRST]; + vdso_ts = &vc->basetime[VDSO_BASE_AUX]; + clock_mode = tk->tkr_mono.clock->vdso_clock_mode; + if (!tk->clock_valid) + clock_mode = VDSO_CLOCKMODE_NONE; + + /* copy vsyscall data */ + vdso_write_begin_clock(vc); + + vc->clock_mode = clock_mode; + + if (clock_mode != VDSO_CLOCKMODE_NONE) { + fill_clock_configuration(vc, &tk->tkr_mono); + + vdso_ts->sec = tk->xtime_sec; + + nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; + nsec += tk->offs_aux; + vdso_ts->sec += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &nsec); + nsec = nsec << tk->tkr_mono.shift; + vdso_ts->nsec = nsec; + } + + __arch_update_vdso_clock(vc); + + vdso_write_end_clock(vc); + + __arch_sync_vdso_time_data(vdata); +} +#endif + /** * vdso_update_begin - Start of a VDSO update section * -- cgit v1.2.3