From 751addac78b6f205ffd47c8736ca6d429dc77703 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 24 Oct 2019 22:53:19 +0200 Subject: y2038: remove obsolete jiffies conversion functions Now that the last user of timespec_to_jiffies() is gone, these can just be removed, everything else is using ktime_t or timespec64 already. Signed-off-by: Arnd Bergmann --- kernel/time/time.c | 58 +++++------------------------------------------------- 1 file changed, 5 insertions(+), 53 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/time.c b/kernel/time/time.c index 704ccd9451b0..cdd7386115ff 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -626,10 +626,12 @@ EXPORT_SYMBOL(__usecs_to_jiffies); * The >> (NSEC_JIFFIE_SC - SEC_JIFFIE_SC) converts the scaled nsec * value to a scaled second value. */ -static unsigned long -__timespec64_to_jiffies(u64 sec, long nsec) + +unsigned long +timespec64_to_jiffies(const struct timespec64 *value) { - nsec = nsec + TICK_NSEC - 1; + u64 sec = value->tv_sec; + long nsec = value->tv_nsec + TICK_NSEC - 1; if (sec >= MAX_SEC_IN_JIFFIES){ sec = MAX_SEC_IN_JIFFIES; @@ -640,18 +642,6 @@ __timespec64_to_jiffies(u64 sec, long nsec) (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; } - -static unsigned long -__timespec_to_jiffies(unsigned long sec, long nsec) -{ - return __timespec64_to_jiffies((u64)sec, nsec); -} - -unsigned long -timespec64_to_jiffies(const struct timespec64 *value) -{ - return __timespec64_to_jiffies(value->tv_sec, value->tv_nsec); -} EXPORT_SYMBOL(timespec64_to_jiffies); void @@ -668,44 +658,6 @@ jiffies_to_timespec64(const unsigned long jiffies, struct timespec64 *value) } EXPORT_SYMBOL(jiffies_to_timespec64); -/* - * We could use a similar algorithm to timespec_to_jiffies (with a - * different multiplier for usec instead of nsec). But this has a - * problem with rounding: we can't exactly add TICK_NSEC - 1 to the - * usec value, since it's not necessarily integral. - * - * We could instead round in the intermediate scaled representation - * (i.e. in units of 1/2^(large scale) jiffies) but that's also - * perilous: the scaling introduces a small positive error, which - * combined with a division-rounding-upward (i.e. adding 2^(scale) - 1 - * units to the intermediate before shifting) leads to accidental - * overflow and overestimates. - * - * At the cost of one additional multiplication by a constant, just - * use the timespec implementation. - */ -unsigned long -timeval_to_jiffies(const struct timeval *value) -{ - return __timespec_to_jiffies(value->tv_sec, - value->tv_usec * NSEC_PER_USEC); -} -EXPORT_SYMBOL(timeval_to_jiffies); - -void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value) -{ - /* - * Convert jiffies to nanoseconds and separate with - * one divide. - */ - u32 rem; - - value->tv_sec = div_u64_rem((u64)jiffies * TICK_NSEC, - NSEC_PER_SEC, &rem); - value->tv_usec = rem / NSEC_PER_USEC; -} -EXPORT_SYMBOL(jiffies_to_timeval); - /* * Convert jiffies/jiffies_64 to clock_t and back. */ -- cgit v1.2.3 From 4f9fbd893fe83a1193adceca41c8f7aa6c7382a1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 15 Nov 2019 15:53:29 +0100 Subject: y2038: rename itimerval to __kernel_old_itimerval Take the renaming of timeval and timespec one level further, also renaming itimerval to __kernel_old_itimerval, to avoid namespace conflicts with the user-space structure that may use 64-bit time_t members. Signed-off-by: Arnd Bergmann --- kernel/time/itimer.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index 9e59c9ea92aa..ca4e6d57d68b 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -97,20 +97,20 @@ static int do_getitimer(int which, struct itimerspec64 *value) return 0; } -static int put_itimerval(struct itimerval __user *o, +static int put_itimerval(struct __kernel_old_itimerval __user *o, const struct itimerspec64 *i) { - struct itimerval v; + struct __kernel_old_itimerval v; v.it_interval.tv_sec = i->it_interval.tv_sec; v.it_interval.tv_usec = i->it_interval.tv_nsec / NSEC_PER_USEC; v.it_value.tv_sec = i->it_value.tv_sec; v.it_value.tv_usec = i->it_value.tv_nsec / NSEC_PER_USEC; - return copy_to_user(o, &v, sizeof(struct itimerval)) ? -EFAULT : 0; + return copy_to_user(o, &v, sizeof(struct __kernel_old_itimerval)) ? -EFAULT : 0; } -SYSCALL_DEFINE2(getitimer, int, which, struct itimerval __user *, value) +SYSCALL_DEFINE2(getitimer, int, which, struct __kernel_old_itimerval __user *, value) { struct itimerspec64 get_buffer; int error = do_getitimer(which, &get_buffer); @@ -314,11 +314,11 @@ SYSCALL_DEFINE1(alarm, unsigned int, seconds) #endif -static int get_itimerval(struct itimerspec64 *o, const struct itimerval __user *i) +static int get_itimerval(struct itimerspec64 *o, const struct __kernel_old_itimerval __user *i) { - struct itimerval v; + struct __kernel_old_itimerval v; - if (copy_from_user(&v, i, sizeof(struct itimerval))) + if (copy_from_user(&v, i, sizeof(struct __kernel_old_itimerval))) return -EFAULT; /* Validate the timevals in value. */ @@ -333,8 +333,8 @@ static int get_itimerval(struct itimerspec64 *o, const struct itimerval __user * return 0; } -SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value, - struct itimerval __user *, ovalue) +SYSCALL_DEFINE3(setitimer, int, which, struct __kernel_old_itimerval __user *, value, + struct __kernel_old_itimerval __user *, ovalue) { struct itimerspec64 set_buffer, get_buffer; int error; -- cgit v1.2.3 From f35deaff1b8eadb9897e4fb8b3edc7717f4ec6fa Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 7 Dec 2019 20:10:26 +0100 Subject: time/posix-stubs: Provide compat itimer supoprt for alpha Using compat_sys_getitimer and compat_sys_setitimer on alpha causes a link failure in the Alpha tinyconfig and other configurations that turn off CONFIG_POSIX_TIMERS. Use the same #ifdef check for the stub version as well. Fixes: 4c22ea2b9120 ("y2038: use compat_{get,set}_itimer on alpha") Reported-by: Guenter Roeck Reported-by: kbuild test robot Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Tested-by: Guenter Roeck Link: https://lore.kernel.org/r/20191207191043.656328-1-arnd@arndb.de --- kernel/time/posix-stubs.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c index 67df65f887ac..20c65a7d4e3a 100644 --- a/kernel/time/posix-stubs.c +++ b/kernel/time/posix-stubs.c @@ -151,6 +151,9 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, #ifdef CONFIG_COMPAT COMPAT_SYS_NI(timer_create); +#endif + +#if defined(CONFIG_COMPAT) || defined(CONFIG_ALPHA) COMPAT_SYS_NI(getitimer); COMPAT_SYS_NI(setitimer); #endif -- cgit v1.2.3 From 2707745533d6d38fa7d3a2212f1fd599c3879491 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Tue, 7 Jan 2020 02:06:29 +0100 Subject: time/sched_clock: Disable interrupts in sched_clock_register() Instead of issueing a warning if sched_clock_register() is called from a context where IRQs are enabled, the code now ensures that IRQs are indeed disabled. Signed-off-by: Paul Cercueil Signed-off-by: Thomas Gleixner Acked-by: Daniel Lezcano Link: https://lore.kernel.org/r/20200107010630.954648-1-paul@crapouillou.net --- kernel/time/sched_clock.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index dbd69052eaa6..e4332e3e2d56 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -169,14 +169,15 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) { u64 res, wrap, new_mask, new_epoch, cyc, ns; u32 new_mult, new_shift; - unsigned long r; + unsigned long r, flags; char r_unit; struct clock_read_data rd; if (cd.rate > rate) return; - WARN_ON(!irqs_disabled()); + /* Cannot register a sched_clock with interrupts on */ + local_irq_save(flags); /* Calculate the mult/shift to convert counter ticks to ns. */ clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600); @@ -233,6 +234,8 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) if (irqtime > 0 || (irqtime == -1 && rate >= 1000000)) enable_sched_clock_irqtime(); + local_irq_restore(flags); + pr_debug("Registered %pS as sched_clock source\n", read); } -- cgit v1.2.3 From 769071ac9f20b6a447410c7eaa55d1a5233ef40c Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 12 Nov 2019 01:26:52 +0000 Subject: ns: Introduce Time Namespace Time Namespace isolates clock values. The kernel provides access to several clocks CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME, etc. CLOCK_REALTIME System-wide clock that measures real (i.e., wall-clock) time. CLOCK_MONOTONIC Clock that cannot be set and represents monotonic time since some unspecified starting point. CLOCK_BOOTTIME Identical to CLOCK_MONOTONIC, except it also includes any time that the system is suspended. For many users, the time namespace means the ability to changes date and time in a container (CLOCK_REALTIME). Providing per namespace notions of CLOCK_REALTIME would be complex with a massive overhead, but has a dubious value. But in the context of checkpoint/restore functionality, monotonic and boottime clocks become interesting. Both clocks are monotonic with unspecified starting points. These clocks are widely used to measure time slices and set timers. After restoring or migrating processes, it has to be guaranteed that they never go backward. In an ideal case, the behavior of these clocks should be the same as for a case when a whole system is suspended. All this means that it is required to set CLOCK_MONOTONIC and CLOCK_BOOTTIME clocks, which can be achieved by adding per-namespace offsets for clocks. A time namespace is similar to a pid namespace in the way how it is created: unshare(CLONE_NEWTIME) system call creates a new time namespace, but doesn't set it to the current process. Then all children of the process will be born in the new time namespace, or a process can use the setns() system call to join a namespace. This scheme allows setting clock offsets for a namespace, before any processes appear in it. All available clone flags have been used, so CLONE_NEWTIME uses the highest bit of CSIGNAL. It means that it can be used only with the unshare() and the clone3() system calls. [ tglx: Adjusted paragraph about clone3() to reality and massaged the changelog a bit. ] Co-developed-by: Dmitry Safonov Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://criu.org/Time_namespace Link: https://lists.openvz.org/pipermail/criu/2018-June/041504.html Link: https://lore.kernel.org/r/20191112012724.250792-4-dima@arista.com --- kernel/time/Makefile | 1 + kernel/time/namespace.c | 217 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 218 insertions(+) create mode 100644 kernel/time/namespace.c (limited to 'kernel/time') diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 1867044800bb..c8f00168afe8 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -19,3 +19,4 @@ obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o obj-$(CONFIG_HAVE_GENERIC_VDSO) += vsyscall.o obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o obj-$(CONFIG_TEST_UDELAY) += test_udelay.o +obj-$(CONFIG_TIME_NS) += namespace.o diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c new file mode 100644 index 000000000000..2662a69e0382 --- /dev/null +++ b/kernel/time/namespace.c @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Andrei Vagin + * Author: Dmitry Safonov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct ucounts *inc_time_namespaces(struct user_namespace *ns) +{ + return inc_ucount(ns, current_euid(), UCOUNT_TIME_NAMESPACES); +} + +static void dec_time_namespaces(struct ucounts *ucounts) +{ + dec_ucount(ucounts, UCOUNT_TIME_NAMESPACES); +} + +/** + * clone_time_ns - Clone a time namespace + * @user_ns: User namespace which owns a new namespace. + * @old_ns: Namespace to clone + * + * Clone @old_ns and set the clone refcount to 1 + * + * Return: The new namespace or ERR_PTR. + */ +static struct time_namespace *clone_time_ns(struct user_namespace *user_ns, + struct time_namespace *old_ns) +{ + struct time_namespace *ns; + struct ucounts *ucounts; + int err; + + err = -ENOSPC; + ucounts = inc_time_namespaces(user_ns); + if (!ucounts) + goto fail; + + err = -ENOMEM; + ns = kmalloc(sizeof(*ns), GFP_KERNEL); + if (!ns) + goto fail_dec; + + kref_init(&ns->kref); + + err = ns_alloc_inum(&ns->ns); + if (err) + goto fail_free; + + ns->ucounts = ucounts; + ns->ns.ops = &timens_operations; + ns->user_ns = get_user_ns(user_ns); + return ns; + +fail_free: + kfree(ns); +fail_dec: + dec_time_namespaces(ucounts); +fail: + return ERR_PTR(err); +} + +/** + * copy_time_ns - Create timens_for_children from @old_ns + * @flags: Cloning flags + * @user_ns: User namespace which owns a new namespace. + * @old_ns: Namespace to clone + * + * If CLONE_NEWTIME specified in @flags, creates a new timens_for_children; + * adds a refcounter to @old_ns otherwise. + * + * Return: timens_for_children namespace or ERR_PTR. + */ +struct time_namespace *copy_time_ns(unsigned long flags, + struct user_namespace *user_ns, struct time_namespace *old_ns) +{ + if (!(flags & CLONE_NEWTIME)) + return get_time_ns(old_ns); + + return clone_time_ns(user_ns, old_ns); +} + +void free_time_ns(struct kref *kref) +{ + struct time_namespace *ns; + + ns = container_of(kref, struct time_namespace, kref); + dec_time_namespaces(ns->ucounts); + put_user_ns(ns->user_ns); + ns_free_inum(&ns->ns); + kfree(ns); +} + +static struct time_namespace *to_time_ns(struct ns_common *ns) +{ + return container_of(ns, struct time_namespace, ns); +} + +static struct ns_common *timens_get(struct task_struct *task) +{ + struct time_namespace *ns = NULL; + struct nsproxy *nsproxy; + + task_lock(task); + nsproxy = task->nsproxy; + if (nsproxy) { + ns = nsproxy->time_ns; + get_time_ns(ns); + } + task_unlock(task); + + return ns ? &ns->ns : NULL; +} + +static struct ns_common *timens_for_children_get(struct task_struct *task) +{ + struct time_namespace *ns = NULL; + struct nsproxy *nsproxy; + + task_lock(task); + nsproxy = task->nsproxy; + if (nsproxy) { + ns = nsproxy->time_ns_for_children; + get_time_ns(ns); + } + task_unlock(task); + + return ns ? &ns->ns : NULL; +} + +static void timens_put(struct ns_common *ns) +{ + put_time_ns(to_time_ns(ns)); +} + +static int timens_install(struct nsproxy *nsproxy, struct ns_common *new) +{ + struct time_namespace *ns = to_time_ns(new); + + if (!current_is_single_threaded()) + return -EUSERS; + + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || + !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) + return -EPERM; + + get_time_ns(ns); + put_time_ns(nsproxy->time_ns); + nsproxy->time_ns = ns; + + get_time_ns(ns); + put_time_ns(nsproxy->time_ns_for_children); + nsproxy->time_ns_for_children = ns; + return 0; +} + +int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk) +{ + struct ns_common *nsc = &nsproxy->time_ns_for_children->ns; + struct time_namespace *ns = to_time_ns(nsc); + + /* create_new_namespaces() already incremented the ref counter */ + if (nsproxy->time_ns == nsproxy->time_ns_for_children) + return 0; + + get_time_ns(ns); + put_time_ns(nsproxy->time_ns); + nsproxy->time_ns = ns; + + return 0; +} + +static struct user_namespace *timens_owner(struct ns_common *ns) +{ + return to_time_ns(ns)->user_ns; +} + +const struct proc_ns_operations timens_operations = { + .name = "time", + .type = CLONE_NEWTIME, + .get = timens_get, + .put = timens_put, + .install = timens_install, + .owner = timens_owner, +}; + +const struct proc_ns_operations timens_for_children_operations = { + .name = "time_for_children", + .type = CLONE_NEWTIME, + .get = timens_for_children_get, + .put = timens_put, + .install = timens_install, + .owner = timens_owner, +}; + +struct time_namespace init_time_ns = { + .kref = KREF_INIT(3), + .user_ns = &init_user_ns, + .ns.inum = PROC_TIME_INIT_INO, + .ns.ops = &timens_operations, +}; + +static int __init time_ns_init(void) +{ + return 0; +} +subsys_initcall(time_ns_init); -- cgit v1.2.3 From af993f58d69ee9c1f421dfc87c3ed231c113989c Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 12 Nov 2019 01:26:53 +0000 Subject: time: Add timens_offsets to be used for tasks in time namespace Introduce offsets for time namespace. They will contain an adjustment needed to convert clocks to/from host's. A new namespace is created with the same offsets as the time namespace of the current process. Co-developed-by: Dmitry Safonov Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-5-dima@arista.com --- kernel/time/namespace.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index 2662a69e0382..c2a58e45fc4b 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -14,6 +14,7 @@ #include #include #include +#include static struct ucounts *inc_time_namespaces(struct user_namespace *ns) { @@ -60,6 +61,7 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns, ns->ucounts = ucounts; ns->ns.ops = &timens_operations; ns->user_ns = get_user_ns(user_ns); + ns->offsets = old_ns->offsets; return ns; fail_free: -- cgit v1.2.3 From 819a95fe3adfc7b558bfd96dd5ac589c4f543fd4 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 12 Nov 2019 01:26:54 +0000 Subject: posix-clocks: Rename the clock_get() callback to clock_get_timespec() The upcoming support for time namespaces requires to have access to: - The time in a task's time namespace for sys_clock_gettime() - The time in the root name space for common_timer_get() That adds a valid reason to finally implement a separate callback which returns the time in ktime_t format, rather than in (struct timespec). Rename the clock_get() callback to clock_get_timespec() as a preparation for introducing clock_get_ktime(). Suggested-by: Thomas Gleixner Co-developed-by: Dmitry Safonov Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-6-dima@arista.com --- kernel/time/alarmtimer.c | 4 ++-- kernel/time/posix-clock.c | 8 ++++---- kernel/time/posix-cpu-timers.c | 32 ++++++++++++++++---------------- kernel/time/posix-timers.c | 22 +++++++++++----------- kernel/time/posix-timers.h | 4 ++-- 5 files changed, 35 insertions(+), 35 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 451f9d05ccfe..8523df726fee 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -657,7 +657,7 @@ static int alarm_clock_getres(const clockid_t which_clock, struct timespec64 *tp } /** - * alarm_clock_get - posix clock_get interface + * alarm_clock_get - posix clock_get_timespec interface * @which_clock: clockid * @tp: timespec to fill. * @@ -837,7 +837,7 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, const struct k_clock alarm_clock = { .clock_getres = alarm_clock_getres, - .clock_get = alarm_clock_get, + .clock_get_timespec = alarm_clock_get, .timer_create = alarm_timer_create, .timer_set = common_timer_set, .timer_del = common_timer_del, diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 200fb2d3be99..77c0c2370b6d 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -310,8 +310,8 @@ out: } const struct k_clock clock_posix_dynamic = { - .clock_getres = pc_clock_getres, - .clock_set = pc_clock_settime, - .clock_get = pc_clock_gettime, - .clock_adj = pc_clock_adjtime, + .clock_getres = pc_clock_getres, + .clock_set = pc_clock_settime, + .clock_get_timespec = pc_clock_gettime, + .clock_adj = pc_clock_adjtime, }; diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 42d512fcfda2..8ff6da77a01f 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1391,26 +1391,26 @@ static int thread_cpu_timer_create(struct k_itimer *timer) } const struct k_clock clock_posix_cpu = { - .clock_getres = posix_cpu_clock_getres, - .clock_set = posix_cpu_clock_set, - .clock_get = posix_cpu_clock_get, - .timer_create = posix_cpu_timer_create, - .nsleep = posix_cpu_nsleep, - .timer_set = posix_cpu_timer_set, - .timer_del = posix_cpu_timer_del, - .timer_get = posix_cpu_timer_get, - .timer_rearm = posix_cpu_timer_rearm, + .clock_getres = posix_cpu_clock_getres, + .clock_set = posix_cpu_clock_set, + .clock_get_timespec = posix_cpu_clock_get, + .timer_create = posix_cpu_timer_create, + .nsleep = posix_cpu_nsleep, + .timer_set = posix_cpu_timer_set, + .timer_del = posix_cpu_timer_del, + .timer_get = posix_cpu_timer_get, + .timer_rearm = posix_cpu_timer_rearm, }; const struct k_clock clock_process = { - .clock_getres = process_cpu_clock_getres, - .clock_get = process_cpu_clock_get, - .timer_create = process_cpu_timer_create, - .nsleep = process_cpu_nsleep, + .clock_getres = process_cpu_clock_getres, + .clock_get_timespec = process_cpu_clock_get, + .timer_create = process_cpu_timer_create, + .nsleep = process_cpu_nsleep, }; const struct k_clock clock_thread = { - .clock_getres = thread_cpu_clock_getres, - .clock_get = thread_cpu_clock_get, - .timer_create = thread_cpu_timer_create, + .clock_getres = thread_cpu_clock_getres, + .clock_get_timespec = thread_cpu_clock_get, + .timer_create = thread_cpu_timer_create, }; diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 0ec5b7a1d769..44d4f9cb782d 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -667,7 +667,7 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) * The timespec64 based conversion is suboptimal, but it's not * worth to implement yet another callback. */ - kc->clock_get(timr->it_clock, &ts64); + kc->clock_get_timespec(timr->it_clock, &ts64); now = timespec64_to_ktime(ts64); /* @@ -781,7 +781,7 @@ static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires, * Posix magic: Relative CLOCK_REALTIME timers are not affected by * clock modifications, so they become CLOCK_MONOTONIC based under the * hood. See hrtimer_init(). Update timr->kclock, so the generic - * functions which use timr->kclock->clock_get() work. + * functions which use timr->kclock->clock_get_timespec() work. * * Note: it_clock stays unmodified, because the next timer_set() might * use ABSTIME, so it needs to switch back. @@ -1067,7 +1067,7 @@ SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, if (!kc) return -EINVAL; - error = kc->clock_get(which_clock, &kernel_tp); + error = kc->clock_get_timespec(which_clock, &kernel_tp); if (!error && put_timespec64(&kernel_tp, tp)) error = -EFAULT; @@ -1149,7 +1149,7 @@ SYSCALL_DEFINE2(clock_gettime32, clockid_t, which_clock, if (!kc) return -EINVAL; - err = kc->clock_get(which_clock, &ts); + err = kc->clock_get_timespec(which_clock, &ts); if (!err && put_old_timespec32(&ts, tp)) err = -EFAULT; @@ -1261,7 +1261,7 @@ SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags, static const struct k_clock clock_realtime = { .clock_getres = posix_get_hrtimer_res, - .clock_get = posix_clock_realtime_get, + .clock_get_timespec = posix_clock_realtime_get, .clock_set = posix_clock_realtime_set, .clock_adj = posix_clock_realtime_adj, .nsleep = common_nsleep, @@ -1279,7 +1279,7 @@ static const struct k_clock clock_realtime = { static const struct k_clock clock_monotonic = { .clock_getres = posix_get_hrtimer_res, - .clock_get = posix_ktime_get_ts, + .clock_get_timespec = posix_ktime_get_ts, .nsleep = common_nsleep, .timer_create = common_timer_create, .timer_set = common_timer_set, @@ -1295,22 +1295,22 @@ static const struct k_clock clock_monotonic = { static const struct k_clock clock_monotonic_raw = { .clock_getres = posix_get_hrtimer_res, - .clock_get = posix_get_monotonic_raw, + .clock_get_timespec = posix_get_monotonic_raw, }; static const struct k_clock clock_realtime_coarse = { .clock_getres = posix_get_coarse_res, - .clock_get = posix_get_realtime_coarse, + .clock_get_timespec = posix_get_realtime_coarse, }; static const struct k_clock clock_monotonic_coarse = { .clock_getres = posix_get_coarse_res, - .clock_get = posix_get_monotonic_coarse, + .clock_get_timespec = posix_get_monotonic_coarse, }; static const struct k_clock clock_tai = { .clock_getres = posix_get_hrtimer_res, - .clock_get = posix_get_tai, + .clock_get_timespec = posix_get_tai, .nsleep = common_nsleep, .timer_create = common_timer_create, .timer_set = common_timer_set, @@ -1326,7 +1326,7 @@ static const struct k_clock clock_tai = { static const struct k_clock clock_boottime = { .clock_getres = posix_get_hrtimer_res, - .clock_get = posix_get_boottime, + .clock_get_timespec = posix_get_boottime, .nsleep = common_nsleep, .timer_create = common_timer_create, .timer_set = common_timer_set, diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h index 897c29e162b9..070611b2c253 100644 --- a/kernel/time/posix-timers.h +++ b/kernel/time/posix-timers.h @@ -6,8 +6,8 @@ struct k_clock { struct timespec64 *tp); int (*clock_set)(const clockid_t which_clock, const struct timespec64 *tp); - int (*clock_get)(const clockid_t which_clock, - struct timespec64 *tp); + int (*clock_get_timespec)(const clockid_t which_clock, + struct timespec64 *tp); int (*clock_adj)(const clockid_t which_clock, struct __kernel_timex *tx); int (*timer_create)(struct k_itimer *timer); int (*nsleep)(const clockid_t which_clock, int flags, -- cgit v1.2.3 From eaf80194d0fe48be393587541c48a799a9a06a70 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 12 Nov 2019 01:26:55 +0000 Subject: posix-clocks: Rename .clock_get_timespec() callbacks accordingly The upcoming support for time namespaces requires to have access to: - The time in a task's time namespace for sys_clock_gettime() - The time in the root name space for common_timer_get() That adds a valid reason to finally implement a separate callback which returns the time in ktime_t format in (struct k_clock). As a preparation ground for introducing clock_get_ktime(), the original callback clock_get() was renamed into clock_get_timespec(). Reflect the renaming into the callback implementations. Suggested-by: Thomas Gleixner Co-developed-by: Dmitry Safonov Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-7-dima@arista.com --- kernel/time/alarmtimer.c | 6 +++--- kernel/time/posix-timers.c | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 8523df726fee..62b06cfa710d 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -657,13 +657,13 @@ static int alarm_clock_getres(const clockid_t which_clock, struct timespec64 *tp } /** - * alarm_clock_get - posix clock_get_timespec interface + * alarm_clock_get_timespec - posix clock_get_timespec interface * @which_clock: clockid * @tp: timespec to fill. * * Provides the underlying alarm base time. */ -static int alarm_clock_get(clockid_t which_clock, struct timespec64 *tp) +static int alarm_clock_get_timespec(clockid_t which_clock, struct timespec64 *tp) { struct alarm_base *base = &alarm_bases[clock2alarm(which_clock)]; @@ -837,7 +837,7 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, const struct k_clock alarm_clock = { .clock_getres = alarm_clock_getres, - .clock_get_timespec = alarm_clock_get, + .clock_get_timespec = alarm_clock_get_timespec, .timer_create = alarm_timer_create, .timer_set = common_timer_set, .timer_del = common_timer_del, diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 44d4f9cb782d..68d4690cc225 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -165,7 +165,7 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) } /* Get clock_realtime */ -static int posix_clock_realtime_get(clockid_t which_clock, struct timespec64 *tp) +static int posix_get_realtime_timespec(clockid_t which_clock, struct timespec64 *tp) { ktime_get_real_ts64(tp); return 0; @@ -187,7 +187,7 @@ static int posix_clock_realtime_adj(const clockid_t which_clock, /* * Get monotonic time for posix timers */ -static int posix_ktime_get_ts(clockid_t which_clock, struct timespec64 *tp) +static int posix_get_monotonic_timespec(clockid_t which_clock, struct timespec64 *tp) { ktime_get_ts64(tp); return 0; @@ -222,13 +222,13 @@ static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 * return 0; } -static int posix_get_boottime(const clockid_t which_clock, struct timespec64 *tp) +static int posix_get_boottime_timespec(const clockid_t which_clock, struct timespec64 *tp) { ktime_get_boottime_ts64(tp); return 0; } -static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp) +static int posix_get_tai_timespec(clockid_t which_clock, struct timespec64 *tp) { ktime_get_clocktai_ts64(tp); return 0; @@ -1261,7 +1261,7 @@ SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags, static const struct k_clock clock_realtime = { .clock_getres = posix_get_hrtimer_res, - .clock_get_timespec = posix_clock_realtime_get, + .clock_get_timespec = posix_get_realtime_timespec, .clock_set = posix_clock_realtime_set, .clock_adj = posix_clock_realtime_adj, .nsleep = common_nsleep, @@ -1279,7 +1279,7 @@ static const struct k_clock clock_realtime = { static const struct k_clock clock_monotonic = { .clock_getres = posix_get_hrtimer_res, - .clock_get_timespec = posix_ktime_get_ts, + .clock_get_timespec = posix_get_monotonic_timespec, .nsleep = common_nsleep, .timer_create = common_timer_create, .timer_set = common_timer_set, @@ -1310,7 +1310,7 @@ static const struct k_clock clock_monotonic_coarse = { static const struct k_clock clock_tai = { .clock_getres = posix_get_hrtimer_res, - .clock_get_timespec = posix_get_tai, + .clock_get_timespec = posix_get_tai_timespec, .nsleep = common_nsleep, .timer_create = common_timer_create, .timer_set = common_timer_set, @@ -1326,7 +1326,7 @@ static const struct k_clock clock_tai = { static const struct k_clock clock_boottime = { .clock_getres = posix_get_hrtimer_res, - .clock_get_timespec = posix_get_boottime, + .clock_get_timespec = posix_get_boottime_timespec, .nsleep = common_nsleep, .timer_create = common_timer_create, .timer_set = common_timer_set, -- cgit v1.2.3 From 41b3b8dffc1f84e581addfbc09bec0289db3315e Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 12 Nov 2019 01:26:56 +0000 Subject: alarmtimer: Rename gettime() callback to get_ktime() The upcoming support for time namespaces requires to have access to: - The time in a tasks time namespace for sys_clock_gettime() - The time in the root name space for common_timer_get() struct alarm_base needs to follow the same naming convention, so rename .gettime() callback into get_ktime() as a preparation for introducing get_timespec(). Suggested-by: Thomas Gleixner Co-developed-by: Dmitry Safonov Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-8-dima@arista.com --- kernel/time/alarmtimer.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 62b06cfa710d..22b6f9b133b2 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -36,13 +36,13 @@ * struct alarm_base - Alarm timer bases * @lock: Lock for syncrhonized access to the base * @timerqueue: Timerqueue head managing the list of events - * @gettime: Function to read the time correlating to the base + * @get_ktime: Function to read the time correlating to the base * @base_clockid: clockid for the base */ static struct alarm_base { spinlock_t lock; struct timerqueue_head timerqueue; - ktime_t (*gettime)(void); + ktime_t (*get_ktime)(void); clockid_t base_clockid; } alarm_bases[ALARM_NUMTYPE]; @@ -207,7 +207,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) spin_unlock_irqrestore(&base->lock, flags); if (alarm->function) - restart = alarm->function(alarm, base->gettime()); + restart = alarm->function(alarm, base->get_ktime()); spin_lock_irqsave(&base->lock, flags); if (restart != ALARMTIMER_NORESTART) { @@ -217,7 +217,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) } spin_unlock_irqrestore(&base->lock, flags); - trace_alarmtimer_fired(alarm, base->gettime()); + trace_alarmtimer_fired(alarm, base->get_ktime()); return ret; } @@ -225,7 +225,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) ktime_t alarm_expires_remaining(const struct alarm *alarm) { struct alarm_base *base = &alarm_bases[alarm->type]; - return ktime_sub(alarm->node.expires, base->gettime()); + return ktime_sub(alarm->node.expires, base->get_ktime()); } EXPORT_SYMBOL_GPL(alarm_expires_remaining); @@ -270,7 +270,7 @@ static int alarmtimer_suspend(struct device *dev) spin_unlock_irqrestore(&base->lock, flags); if (!next) continue; - delta = ktime_sub(next->expires, base->gettime()); + delta = ktime_sub(next->expires, base->get_ktime()); if (!min || (delta < min)) { expires = next->expires; min = delta; @@ -364,7 +364,7 @@ void alarm_start(struct alarm *alarm, ktime_t start) hrtimer_start(&alarm->timer, alarm->node.expires, HRTIMER_MODE_ABS); spin_unlock_irqrestore(&base->lock, flags); - trace_alarmtimer_start(alarm, base->gettime()); + trace_alarmtimer_start(alarm, base->get_ktime()); } EXPORT_SYMBOL_GPL(alarm_start); @@ -377,7 +377,7 @@ void alarm_start_relative(struct alarm *alarm, ktime_t start) { struct alarm_base *base = &alarm_bases[alarm->type]; - start = ktime_add_safe(start, base->gettime()); + start = ktime_add_safe(start, base->get_ktime()); alarm_start(alarm, start); } EXPORT_SYMBOL_GPL(alarm_start_relative); @@ -414,7 +414,7 @@ int alarm_try_to_cancel(struct alarm *alarm) alarmtimer_dequeue(base, alarm); spin_unlock_irqrestore(&base->lock, flags); - trace_alarmtimer_cancel(alarm, base->gettime()); + trace_alarmtimer_cancel(alarm, base->get_ktime()); return ret; } EXPORT_SYMBOL_GPL(alarm_try_to_cancel); @@ -474,7 +474,7 @@ u64 alarm_forward_now(struct alarm *alarm, ktime_t interval) { struct alarm_base *base = &alarm_bases[alarm->type]; - return alarm_forward(alarm, base->gettime(), interval); + return alarm_forward(alarm, base->get_ktime(), interval); } EXPORT_SYMBOL_GPL(alarm_forward_now); @@ -500,7 +500,7 @@ static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type) return; } - delta = ktime_sub(absexp, base->gettime()); + delta = ktime_sub(absexp, base->get_ktime()); spin_lock_irqsave(&freezer_delta_lock, flags); if (!freezer_delta || (delta < freezer_delta)) { @@ -632,7 +632,7 @@ static void alarm_timer_arm(struct k_itimer *timr, ktime_t expires, struct alarm_base *base = &alarm_bases[alarm->type]; if (!absolute) - expires = ktime_add_safe(expires, base->gettime()); + expires = ktime_add_safe(expires, base->get_ktime()); if (sigev_none) alarm->node.expires = expires; else @@ -670,7 +670,7 @@ static int alarm_clock_get_timespec(clockid_t which_clock, struct timespec64 *tp if (!alarmtimer_get_rtcdev()) return -EINVAL; - *tp = ktime_to_timespec64(base->gettime()); + *tp = ktime_to_timespec64(base->get_ktime()); return 0; } @@ -747,7 +747,7 @@ static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp, struct timespec64 rmt; ktime_t rem; - rem = ktime_sub(absexp, alarm_bases[type].gettime()); + rem = ktime_sub(absexp, alarm_bases[type].get_ktime()); if (rem <= 0) return 0; @@ -816,7 +816,7 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, exp = timespec64_to_ktime(*tsreq); /* Convert (if necessary) to absolute time */ if (flags != TIMER_ABSTIME) { - ktime_t now = alarm_bases[type].gettime(); + ktime_t now = alarm_bases[type].get_ktime(); exp = ktime_add_safe(now, exp); } @@ -882,9 +882,9 @@ static int __init alarmtimer_init(void) /* Initialize alarm bases */ alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME; - alarm_bases[ALARM_REALTIME].gettime = &ktime_get_real; + alarm_bases[ALARM_REALTIME].get_ktime = &ktime_get_real; alarm_bases[ALARM_BOOTTIME].base_clockid = CLOCK_BOOTTIME; - alarm_bases[ALARM_BOOTTIME].gettime = &ktime_get_boottime; + alarm_bases[ALARM_BOOTTIME].get_ktime = &ktime_get_boottime; for (i = 0; i < ALARM_NUMTYPE; i++) { timerqueue_init_head(&alarm_bases[i].timerqueue); spin_lock_init(&alarm_bases[i].lock); -- cgit v1.2.3 From 2f58bf909abf9670fa4e848b433dc12ba4c2a44e Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 12 Nov 2019 01:26:57 +0000 Subject: alarmtimer: Provide get_timespec() callback The upcoming support for time namespaces requires to have access to: - The time in a task's time namespace for sys_clock_gettime() - The time in the root name space for common_timer_get() Wire up alarm bases with get_timespec(). Suggested-by: Thomas Gleixner Co-developed-by: Dmitry Safonov Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-9-dima@arista.com --- kernel/time/alarmtimer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 22b6f9b133b2..357be1fe6e1f 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -37,12 +37,14 @@ * @lock: Lock for syncrhonized access to the base * @timerqueue: Timerqueue head managing the list of events * @get_ktime: Function to read the time correlating to the base + * @get_timespec: Function to read the namespace time correlating to the base * @base_clockid: clockid for the base */ static struct alarm_base { spinlock_t lock; struct timerqueue_head timerqueue; ktime_t (*get_ktime)(void); + void (*get_timespec)(struct timespec64 *tp); clockid_t base_clockid; } alarm_bases[ALARM_NUMTYPE]; @@ -670,7 +672,8 @@ static int alarm_clock_get_timespec(clockid_t which_clock, struct timespec64 *tp if (!alarmtimer_get_rtcdev()) return -EINVAL; - *tp = ktime_to_timespec64(base->get_ktime()); + base->get_timespec(tp); + return 0; } @@ -883,8 +886,10 @@ static int __init alarmtimer_init(void) /* Initialize alarm bases */ alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME; alarm_bases[ALARM_REALTIME].get_ktime = &ktime_get_real; + alarm_bases[ALARM_REALTIME].get_timespec = ktime_get_real_ts64, alarm_bases[ALARM_BOOTTIME].base_clockid = CLOCK_BOOTTIME; alarm_bases[ALARM_BOOTTIME].get_ktime = &ktime_get_boottime; + alarm_bases[ALARM_BOOTTIME].get_timespec = ktime_get_boottime_ts64; for (i = 0; i < ALARM_NUMTYPE; i++) { timerqueue_init_head(&alarm_bases[i].timerqueue); spin_lock_init(&alarm_bases[i].lock); -- cgit v1.2.3 From 9c71a2e8a757bc6aee256bc97c6fb711144b0a0f Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 12 Nov 2019 01:26:58 +0000 Subject: posix-clocks: Introduce clock_get_ktime() callback The callsite in common_timer_get() has already a comment: /* * The timespec64 based conversion is suboptimal, but it's not * worth to implement yet another callback. */ kc->clock_get(timr->it_clock, &ts64); now = timespec64_to_ktime(ts64); The upcoming support for time namespaces requires to have access to: - The time in a task's time namespace for sys_clock_gettime() - The time in the root name space for common_timer_get() That adds a valid reason to finally implement a separate callback which returns the time in ktime_t format. Suggested-by: Thomas Gleixner Co-developed-by: Dmitry Safonov Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-10-dima@arista.com --- kernel/time/alarmtimer.c | 19 ++++++++++++++++++- kernel/time/posix-timers.c | 26 +++++++++++++++++++++++++- kernel/time/posix-timers.h | 3 +++ 3 files changed, 46 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 357be1fe6e1f..4d8c90546635 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -663,7 +663,7 @@ static int alarm_clock_getres(const clockid_t which_clock, struct timespec64 *tp * @which_clock: clockid * @tp: timespec to fill. * - * Provides the underlying alarm base time. + * Provides the underlying alarm base time in a tasks time namespace. */ static int alarm_clock_get_timespec(clockid_t which_clock, struct timespec64 *tp) { @@ -677,6 +677,22 @@ static int alarm_clock_get_timespec(clockid_t which_clock, struct timespec64 *tp return 0; } +/** + * alarm_clock_get_ktime - posix clock_get_ktime interface + * @which_clock: clockid + * + * Provides the underlying alarm base time in the root namespace. + */ +static ktime_t alarm_clock_get_ktime(clockid_t which_clock) +{ + struct alarm_base *base = &alarm_bases[clock2alarm(which_clock)]; + + if (!alarmtimer_get_rtcdev()) + return -EINVAL; + + return base->get_ktime(); +} + /** * alarm_timer_create - posix timer_create interface * @new_timer: k_itimer pointer to manage @@ -840,6 +856,7 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, const struct k_clock alarm_clock = { .clock_getres = alarm_clock_getres, + .clock_get_ktime = alarm_clock_get_ktime, .clock_get_timespec = alarm_clock_get_timespec, .timer_create = alarm_timer_create, .timer_set = common_timer_set, diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 68d4690cc225..a1f6b968c5d8 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -171,6 +171,11 @@ static int posix_get_realtime_timespec(clockid_t which_clock, struct timespec64 return 0; } +static ktime_t posix_get_realtime_ktime(clockid_t which_clock) +{ + return ktime_get_real(); +} + /* Set clock_realtime */ static int posix_clock_realtime_set(const clockid_t which_clock, const struct timespec64 *tp) @@ -193,6 +198,11 @@ static int posix_get_monotonic_timespec(clockid_t which_clock, struct timespec64 return 0; } +static ktime_t posix_get_monotonic_ktime(clockid_t which_clock) +{ + return ktime_get(); +} + /* * Get monotonic-raw time for posix timers */ @@ -228,12 +238,22 @@ static int posix_get_boottime_timespec(const clockid_t which_clock, struct times return 0; } +static ktime_t posix_get_boottime_ktime(const clockid_t which_clock) +{ + return ktime_get_boottime(); +} + static int posix_get_tai_timespec(clockid_t which_clock, struct timespec64 *tp) { ktime_get_clocktai_ts64(tp); return 0; } +static ktime_t posix_get_tai_ktime(clockid_t which_clock) +{ + return ktime_get_clocktai(); +} + static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec64 *tp) { tp->tv_sec = 0; @@ -781,7 +801,7 @@ static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires, * Posix magic: Relative CLOCK_REALTIME timers are not affected by * clock modifications, so they become CLOCK_MONOTONIC based under the * hood. See hrtimer_init(). Update timr->kclock, so the generic - * functions which use timr->kclock->clock_get_timespec() work. + * functions which use timr->kclock->clock_get_*() work. * * Note: it_clock stays unmodified, because the next timer_set() might * use ABSTIME, so it needs to switch back. @@ -1262,6 +1282,7 @@ SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags, static const struct k_clock clock_realtime = { .clock_getres = posix_get_hrtimer_res, .clock_get_timespec = posix_get_realtime_timespec, + .clock_get_ktime = posix_get_realtime_ktime, .clock_set = posix_clock_realtime_set, .clock_adj = posix_clock_realtime_adj, .nsleep = common_nsleep, @@ -1280,6 +1301,7 @@ static const struct k_clock clock_realtime = { static const struct k_clock clock_monotonic = { .clock_getres = posix_get_hrtimer_res, .clock_get_timespec = posix_get_monotonic_timespec, + .clock_get_ktime = posix_get_monotonic_ktime, .nsleep = common_nsleep, .timer_create = common_timer_create, .timer_set = common_timer_set, @@ -1310,6 +1332,7 @@ static const struct k_clock clock_monotonic_coarse = { static const struct k_clock clock_tai = { .clock_getres = posix_get_hrtimer_res, + .clock_get_ktime = posix_get_tai_ktime, .clock_get_timespec = posix_get_tai_timespec, .nsleep = common_nsleep, .timer_create = common_timer_create, @@ -1326,6 +1349,7 @@ static const struct k_clock clock_tai = { static const struct k_clock clock_boottime = { .clock_getres = posix_get_hrtimer_res, + .clock_get_ktime = posix_get_boottime_ktime, .clock_get_timespec = posix_get_boottime_timespec, .nsleep = common_nsleep, .timer_create = common_timer_create, diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h index 070611b2c253..f32a2ebba9b8 100644 --- a/kernel/time/posix-timers.h +++ b/kernel/time/posix-timers.h @@ -6,8 +6,11 @@ struct k_clock { struct timespec64 *tp); int (*clock_set)(const clockid_t which_clock, const struct timespec64 *tp); + /* Returns the clock value in the current time namespace. */ int (*clock_get_timespec)(const clockid_t which_clock, struct timespec64 *tp); + /* Returns the clock value in the root time namespace. */ + ktime_t (*clock_get_ktime)(const clockid_t which_clock); int (*clock_adj)(const clockid_t which_clock, struct __kernel_timex *tx); int (*timer_create)(struct k_itimer *timer); int (*nsleep)(const clockid_t which_clock, int flags, -- cgit v1.2.3 From 198fa445d5c4c1a1c6c1d39f962559f8d008e79d Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 12 Nov 2019 01:26:59 +0000 Subject: posix-timers: Use clock_get_ktime() in common_timer_get() Now, when the clock_get_ktime() callback exists, the suboptimal timespec64-based conversion can be removed from common_timer_get(). Suggested-by: Thomas Gleixner Co-developed-by: Dmitry Safonov Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-11-dima@arista.com --- kernel/time/posix-timers.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index a1f6b968c5d8..fe1de4f71ace 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -665,7 +665,6 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) { const struct k_clock *kc = timr->kclock; ktime_t now, remaining, iv; - struct timespec64 ts64; bool sig_none; sig_none = timr->it_sigev_notify == SIGEV_NONE; @@ -683,12 +682,7 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) return; } - /* - * The timespec64 based conversion is suboptimal, but it's not - * worth to implement yet another callback. - */ - kc->clock_get_timespec(timr->it_clock, &ts64); - now = timespec64_to_ktime(ts64); + now = kc->clock_get_ktime(timr->it_clock); /* * When a requeue is pending or this is a SIGEV_NONE timer move the -- cgit v1.2.3 From 5a590f35add93c2bdf3ed83eee73111021679562 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 12 Nov 2019 01:27:00 +0000 Subject: posix-clocks: Wire up clock_gettime() with timens offsets Adjust monotonic and boottime clocks with per-timens offsets. As the result a process inside time namespace will see timers and clocks corrected to offsets that were set when the namespace was created Note that applications usually go through vDSO to get time, which is not yet adjusted. Further changes will complete time namespace virtualisation with vDSO support. Co-developed-by: Dmitry Safonov Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-12-dima@arista.com --- kernel/time/alarmtimer.c | 9 ++++++++- kernel/time/posix-stubs.c | 3 +++ kernel/time/posix-timers.c | 5 +++++ 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 4d8c90546635..9a8e81bc4ec2 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "posix-timers.h" @@ -886,6 +887,12 @@ static struct platform_driver alarmtimer_driver = { } }; +static void get_boottime_timespec(struct timespec64 *tp) +{ + ktime_get_boottime_ts64(tp); + timens_add_boottime(tp); +} + /** * alarmtimer_init - Initialize alarm timer code * @@ -906,7 +913,7 @@ static int __init alarmtimer_init(void) alarm_bases[ALARM_REALTIME].get_timespec = ktime_get_real_ts64, alarm_bases[ALARM_BOOTTIME].base_clockid = CLOCK_BOOTTIME; alarm_bases[ALARM_BOOTTIME].get_ktime = &ktime_get_boottime; - alarm_bases[ALARM_BOOTTIME].get_timespec = ktime_get_boottime_ts64; + alarm_bases[ALARM_BOOTTIME].get_timespec = get_boottime_timespec; for (i = 0; i < ALARM_NUMTYPE; i++) { timerqueue_init_head(&alarm_bases[i].timerqueue); spin_lock_init(&alarm_bases[i].lock); diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c index 20c65a7d4e3a..bcbaa2045f5e 100644 --- a/kernel/time/posix-stubs.c +++ b/kernel/time/posix-stubs.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER @@ -77,9 +78,11 @@ int do_clock_gettime(clockid_t which_clock, struct timespec64 *tp) break; case CLOCK_MONOTONIC: ktime_get_ts64(tp); + timens_add_monotonic(tp); break; case CLOCK_BOOTTIME: ktime_get_boottime_ts64(tp); + timens_add_boottime(tp); break; default: return -EINVAL; diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index fe1de4f71ace..d26b915b227a 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "timekeeping.h" #include "posix-timers.h" @@ -195,6 +196,7 @@ static int posix_clock_realtime_adj(const clockid_t which_clock, static int posix_get_monotonic_timespec(clockid_t which_clock, struct timespec64 *tp) { ktime_get_ts64(tp); + timens_add_monotonic(tp); return 0; } @@ -209,6 +211,7 @@ static ktime_t posix_get_monotonic_ktime(clockid_t which_clock) static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 *tp) { ktime_get_raw_ts64(tp); + timens_add_monotonic(tp); return 0; } @@ -223,6 +226,7 @@ static int posix_get_monotonic_coarse(clockid_t which_clock, struct timespec64 *tp) { ktime_get_coarse_ts64(tp); + timens_add_monotonic(tp); return 0; } @@ -235,6 +239,7 @@ static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 * static int posix_get_boottime_timespec(const clockid_t which_clock, struct timespec64 *tp) { ktime_get_boottime_ts64(tp); + timens_add_boottime(tp); return 0; } -- cgit v1.2.3 From 89dd8eecfe961fab4924dcd14f80cf2ab2820044 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 12 Nov 2019 01:27:01 +0000 Subject: time: Add do_timens_ktime_to_host() helper The helper subtracts namespace's clock offset from the given time and ensures that the result is within [0, KTIME_MAX]. Co-developed-by: Dmitry Safonov Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-13-dima@arista.com --- kernel/time/namespace.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index c2a58e45fc4b..1a0fbaa5d2d4 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -16,6 +16,42 @@ #include #include +ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim, + struct timens_offsets *ns_offsets) +{ + ktime_t offset; + + switch (clockid) { + case CLOCK_MONOTONIC: + offset = timespec64_to_ktime(ns_offsets->monotonic); + break; + case CLOCK_BOOTTIME: + case CLOCK_BOOTTIME_ALARM: + offset = timespec64_to_ktime(ns_offsets->boottime); + break; + default: + return tim; + } + + /* + * Check that @tim value is in [offset, KTIME_MAX + offset] + * and subtract offset. + */ + if (tim < offset) { + /* + * User can specify @tim *absolute* value - if it's lesser than + * the time namespace's offset - it's already expired. + */ + tim = 0; + } else { + tim = ktime_sub(tim, offset); + if (unlikely(tim > KTIME_MAX)) + tim = KTIME_MAX; + } + + return tim; +} + static struct ucounts *inc_time_namespaces(struct user_namespace *ns) { return inc_ucount(ns, current_euid(), UCOUNT_TIME_NAMESPACES); -- cgit v1.2.3 From 7da8b3a44bb426a43670b3a97ed61085018a9d43 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 12 Nov 2019 01:27:03 +0000 Subject: posix-timers: Make timer_settime() time namespace aware Wire timer_settime() syscall into time namespace virtualization. sys_timer_settime() calls the ktime->timer_set() callback. Right now, common_timer_set() is the only implementation for the callback. The user-supplied expiry value is converted from timespec64 to ktime and then timens_ktime_to_host() can be used to convert namespace's time to the host time. Inside a time namespace kernel's time differs by a fixed offset from a user-supplied time, but only absolute values (TIMER_ABSTIME) must be converted. Co-developed-by: Dmitry Safonov Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-15-dima@arista.com --- kernel/time/posix-timers.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index d26b915b227a..473082b0b57f 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -885,6 +885,8 @@ int common_timer_set(struct k_itimer *timr, int flags, timr->it_interval = timespec64_to_ktime(new_setting->it_interval); expires = timespec64_to_ktime(new_setting->it_value); + if (flags & TIMER_ABSTIME) + expires = timens_ktime_to_host(timr->it_clock, expires); sigev_none = timr->it_sigev_notify == SIGEV_NONE; kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none); -- cgit v1.2.3 From 0b9b9a3b162e85e620e3598f1badc45b8a177492 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 12 Nov 2019 01:27:04 +0000 Subject: alarmtimer: Make nanosleep() time namespace aware clock_nanosleep() accepts absolute values of expiration time when the TIMER_ABSTIME flag is set. This absolute value is inside the task's time namespace and has to be converted to the host's time. Co-developed-by: Dmitry Safonov Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-16-dima@arista.com --- kernel/time/alarmtimer.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 9a8e81bc4ec2..b51b36e533c4 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -839,6 +839,8 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, ktime_t now = alarm_bases[type].get_ktime(); exp = ktime_add_safe(now, exp); + } else { + exp = timens_ktime_to_host(which_clock, exp); } ret = alarmtimer_do_nsleep(&alarm, exp, type); -- cgit v1.2.3 From ea2d1f7fce0f18b67f915c00c6a7a6860116bc92 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 12 Nov 2019 01:27:05 +0000 Subject: hrtimers: Prepare hrtimer_nanosleep() for time namespaces clock_nanosleep() accepts absolute values of expiration time when TIMER_ABSTIME flag is set. This absolute value is inside the task's time namespace, and has to be converted to the host's time. There is timens_ktime_to_host() helper for converting time, but it accepts ktime argument. As a preparation, make hrtimer_nanosleep() accept a clock value in ktime instead of timespec64. Co-developed-by: Dmitry Safonov Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-17-dima@arista.com --- kernel/time/hrtimer.c | 12 +++++++----- kernel/time/posix-stubs.c | 4 ++-- kernel/time/posix-timers.c | 4 +++- 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 8de90ea31280..d8b62f93fc8d 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1910,8 +1910,8 @@ static long __sched hrtimer_nanosleep_restart(struct restart_block *restart) return ret; } -long hrtimer_nanosleep(const struct timespec64 *rqtp, - const enum hrtimer_mode mode, const clockid_t clockid) +long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, + const clockid_t clockid) { struct restart_block *restart; struct hrtimer_sleeper t; @@ -1923,7 +1923,7 @@ long hrtimer_nanosleep(const struct timespec64 *rqtp, slack = 0; hrtimer_init_sleeper_on_stack(&t, clockid, mode); - hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack); + hrtimer_set_expires_range_ns(&t.timer, rqtp, slack); ret = do_nanosleep(&t, mode); if (ret != -ERESTART_RESTARTBLOCK) goto out; @@ -1958,7 +1958,8 @@ SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp, current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; current->restart_block.nanosleep.rmtp = rmtp; - return hrtimer_nanosleep(&tu, HRTIMER_MODE_REL, CLOCK_MONOTONIC); + return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, + CLOCK_MONOTONIC); } #endif @@ -1978,7 +1979,8 @@ SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp, current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; current->restart_block.nanosleep.compat_rmtp = rmtp; - return hrtimer_nanosleep(&tu, HRTIMER_MODE_REL, CLOCK_MONOTONIC); + return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, + CLOCK_MONOTONIC); } #endif diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c index bcbaa2045f5e..5745a138f254 100644 --- a/kernel/time/posix-stubs.c +++ b/kernel/time/posix-stubs.c @@ -147,7 +147,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, rmtp = NULL; current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; current->restart_block.nanosleep.rmtp = rmtp; - return hrtimer_nanosleep(&t, flags & TIMER_ABSTIME ? + return hrtimer_nanosleep(timespec64_to_ktime(t), flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL, which_clock); } @@ -236,7 +236,7 @@ SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags, rmtp = NULL; current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; current->restart_block.nanosleep.compat_rmtp = rmtp; - return hrtimer_nanosleep(&t, flags & TIMER_ABSTIME ? + return hrtimer_nanosleep(timespec64_to_ktime(t), flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL, which_clock); } diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 473082b0b57f..75fee6e39e5a 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1221,7 +1221,9 @@ SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock, static int common_nsleep(const clockid_t which_clock, int flags, const struct timespec64 *rqtp) { - return hrtimer_nanosleep(rqtp, flags & TIMER_ABSTIME ? + ktime_t texp = timespec64_to_ktime(*rqtp); + + return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL, which_clock); } -- cgit v1.2.3 From 1f9b37bfbb607a09d838c248843e63a2cafe1080 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 12 Nov 2019 01:27:06 +0000 Subject: posix-timers: Make clock_nanosleep() time namespace aware clock_nanosleep() accepts absolute values of expiration time, if the TIMER_ABSTIME flag is set. This value is in the tasks time namespace, which has to be converted to the host time namespace. Co-developed-by: Dmitry Safonov Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-18-dima@arista.com --- kernel/time/posix-stubs.c | 12 ++++++++++-- kernel/time/posix-timers.c | 17 +++++++++++++++-- 2 files changed, 25 insertions(+), 4 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c index 5745a138f254..fcb3b21d8bdc 100644 --- a/kernel/time/posix-stubs.c +++ b/kernel/time/posix-stubs.c @@ -129,6 +129,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, struct __kernel_timespec __user *, rmtp) { struct timespec64 t; + ktime_t texp; switch (which_clock) { case CLOCK_REALTIME: @@ -147,7 +148,10 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, rmtp = NULL; current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; current->restart_block.nanosleep.rmtp = rmtp; - return hrtimer_nanosleep(timespec64_to_ktime(t), flags & TIMER_ABSTIME ? + texp = timespec64_to_ktime(t); + if (flags & TIMER_ABSTIME) + texp = timens_ktime_to_host(which_clock, texp); + return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL, which_clock); } @@ -218,6 +222,7 @@ SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags, struct old_timespec32 __user *, rmtp) { struct timespec64 t; + ktime_t texp; switch (which_clock) { case CLOCK_REALTIME: @@ -236,7 +241,10 @@ SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags, rmtp = NULL; current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; current->restart_block.nanosleep.compat_rmtp = rmtp; - return hrtimer_nanosleep(timespec64_to_ktime(t), flags & TIMER_ABSTIME ? + texp = timespec64_to_ktime(t); + if (flags & TIMER_ABSTIME) + texp = timens_ktime_to_host(which_clock, texp); + return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL, which_clock); } diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 75fee6e39e5a..ff0eb30de346 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1228,6 +1228,19 @@ static int common_nsleep(const clockid_t which_clock, int flags, which_clock); } +static int common_nsleep_timens(const clockid_t which_clock, int flags, + const struct timespec64 *rqtp) +{ + ktime_t texp = timespec64_to_ktime(*rqtp); + + if (flags & TIMER_ABSTIME) + texp = timens_ktime_to_host(which_clock, texp); + + return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ? + HRTIMER_MODE_ABS : HRTIMER_MODE_REL, + which_clock); +} + SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, const struct __kernel_timespec __user *, rqtp, struct __kernel_timespec __user *, rmtp) @@ -1305,7 +1318,7 @@ static const struct k_clock clock_monotonic = { .clock_getres = posix_get_hrtimer_res, .clock_get_timespec = posix_get_monotonic_timespec, .clock_get_ktime = posix_get_monotonic_ktime, - .nsleep = common_nsleep, + .nsleep = common_nsleep_timens, .timer_create = common_timer_create, .timer_set = common_timer_set, .timer_get = common_timer_get, @@ -1354,7 +1367,7 @@ static const struct k_clock clock_boottime = { .clock_getres = posix_get_hrtimer_res, .clock_get_ktime = posix_get_boottime_ktime, .clock_get_timespec = posix_get_boottime_timespec, - .nsleep = common_nsleep, + .nsleep = common_nsleep_timens, .timer_create = common_timer_create, .timer_set = common_timer_set, .timer_get = common_timer_get, -- cgit v1.2.3 From afaa7b5ac7c87479fb5a626f87d2157af30d6401 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 12 Nov 2019 01:27:12 +0000 Subject: time: Allocate per-timens vvar page VDSO support for Time namespace needs to set up a page with the same layout as VVAR. That timens page will be placed on position of VVAR page inside namespace. That page contains time namespace clock offsets and it has vdso_data->seq set to 1 to enforce the slow path and vdso_data->clock_mode set to VCLOCK_TIMENS to enforce the time namespace handling path. Allocate the timens page during namespace creation. Setup the offsets when the first task enters the ns and freeze them to guarantee the pace of monotonic/boottime clocks and to avoid breakage of applications. The design decision is to have a global offset_lock which is used during namespace offsets setup and to freeze offsets when the first task joins the new time namespace. That is better in terms of memory usage compared to having a per namespace mutex that's used only during the setup period. Suggested-by: Andy Lutomirski Based-on-work-by: Thomas Gleixner Co-developed-by: Andrei Vagin Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-24-dima@arista.com --- kernel/time/namespace.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index 1a0fbaa5d2d4..d705c15d0273 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -16,6 +16,8 @@ #include #include +#include + ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim, struct timens_offsets *ns_offsets) { @@ -90,16 +92,23 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns, kref_init(&ns->kref); + ns->vvar_page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!ns->vvar_page) + goto fail_free; + err = ns_alloc_inum(&ns->ns); if (err) - goto fail_free; + goto fail_free_page; ns->ucounts = ucounts; ns->ns.ops = &timens_operations; ns->user_ns = get_user_ns(user_ns); ns->offsets = old_ns->offsets; + ns->frozen_offsets = false; return ns; +fail_free_page: + __free_page(ns->vvar_page); fail_free: kfree(ns); fail_dec: @@ -128,6 +137,93 @@ struct time_namespace *copy_time_ns(unsigned long flags, return clone_time_ns(user_ns, old_ns); } +static struct timens_offset offset_from_ts(struct timespec64 off) +{ + struct timens_offset ret; + + ret.sec = off.tv_sec; + ret.nsec = off.tv_nsec; + + return ret; +} + +/* + * A time namespace VVAR page has the same layout as the VVAR page which + * contains the system wide VDSO data. + * + * For a normal task the VVAR pages are installed in the normal ordering: + * VVAR + * PVCLOCK + * HVCLOCK + * TIMENS <- Not really required + * + * Now for a timens task the pages are installed in the following order: + * TIMENS + * PVCLOCK + * HVCLOCK + * VVAR + * + * The check for vdso_data->clock_mode is in the unlikely path of + * the seq begin magic. So for the non-timens case most of the time + * 'seq' is even, so the branch is not taken. + * + * If 'seq' is odd, i.e. a concurrent update is in progress, the extra check + * for vdso_data->clock_mode is a non-issue. The task is spin waiting for the + * update to finish and for 'seq' to become even anyway. + * + * Timens page has vdso_data->clock_mode set to VCLOCK_TIMENS which enforces + * the time namespace handling path. + */ +static void timens_setup_vdso_data(struct vdso_data *vdata, + struct time_namespace *ns) +{ + struct timens_offset *offset = vdata->offset; + struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic); + struct timens_offset boottime = offset_from_ts(ns->offsets.boottime); + + vdata->seq = 1; + vdata->clock_mode = VCLOCK_TIMENS; + offset[CLOCK_MONOTONIC] = monotonic; + offset[CLOCK_MONOTONIC_RAW] = monotonic; + offset[CLOCK_MONOTONIC_COARSE] = monotonic; + offset[CLOCK_BOOTTIME] = boottime; + offset[CLOCK_BOOTTIME_ALARM] = boottime; +} + +/* + * Protects possibly multiple offsets writers racing each other + * and tasks entering the namespace. + */ +static DEFINE_MUTEX(offset_lock); + +static void timens_set_vvar_page(struct task_struct *task, + struct time_namespace *ns) +{ + struct vdso_data *vdata; + unsigned int i; + + if (ns == &init_time_ns) + return; + + /* Fast-path, taken by every task in namespace except the first. */ + if (likely(ns->frozen_offsets)) + return; + + mutex_lock(&offset_lock); + /* Nothing to-do: vvar_page has been already initialized. */ + if (ns->frozen_offsets) + goto out; + + ns->frozen_offsets = true; + vdata = arch_get_vdso_data(page_address(ns->vvar_page)); + + for (i = 0; i < CS_BASES; i++) + timens_setup_vdso_data(&vdata[i], ns); + +out: + mutex_unlock(&offset_lock); +} + void free_time_ns(struct kref *kref) { struct time_namespace *ns; @@ -136,6 +232,7 @@ void free_time_ns(struct kref *kref) dec_time_namespaces(ns->ucounts); put_user_ns(ns->user_ns); ns_free_inum(&ns->ns); + __free_page(ns->vvar_page); kfree(ns); } @@ -192,6 +289,8 @@ static int timens_install(struct nsproxy *nsproxy, struct ns_common *new) !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) return -EPERM; + timens_set_vvar_page(current, ns); + get_time_ns(ns); put_time_ns(nsproxy->time_ns); nsproxy->time_ns = ns; @@ -211,6 +310,8 @@ int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk) if (nsproxy->time_ns == nsproxy->time_ns_for_children) return 0; + timens_set_vvar_page(tsk, ns); + get_time_ns(ns); put_time_ns(nsproxy->time_ns); nsproxy->time_ns = ns; @@ -246,6 +347,7 @@ struct time_namespace init_time_ns = { .user_ns = &init_user_ns, .ns.inum = PROC_TIME_INIT_INO, .ns.ops = &timens_operations, + .frozen_offsets = true, }; static int __init time_ns_init(void) -- cgit v1.2.3 From 70ddf65184ec1e8989322f35193e4fde7377f0cc Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 12 Nov 2019 01:27:15 +0000 Subject: x86/vdso: Zap vvar pages when switching to a time namespace The VVAR page layout depends on whether a task belongs to the root or non-root time namespace. Whenever a task changes its namespace, the VVAR page tables are cleared and then they will be re-faulted with a corresponding layout. Co-developed-by: Andrei Vagin Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-27-dima@arista.com --- kernel/time/namespace.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index d705c15d0273..0732964803b9 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -281,6 +281,7 @@ static void timens_put(struct ns_common *ns) static int timens_install(struct nsproxy *nsproxy, struct ns_common *new) { struct time_namespace *ns = to_time_ns(new); + int err; if (!current_is_single_threaded()) return -EUSERS; @@ -291,6 +292,10 @@ static int timens_install(struct nsproxy *nsproxy, struct ns_common *new) timens_set_vvar_page(current, ns); + err = vdso_join_timens(current, ns); + if (err) + return err; + get_time_ns(ns); put_time_ns(nsproxy->time_ns); nsproxy->time_ns = ns; @@ -305,6 +310,7 @@ int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk) { struct ns_common *nsc = &nsproxy->time_ns_for_children->ns; struct time_namespace *ns = to_time_ns(nsc); + int err; /* create_new_namespaces() already incremented the ref counter */ if (nsproxy->time_ns == nsproxy->time_ns_for_children) @@ -312,6 +318,10 @@ int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk) timens_set_vvar_page(tsk, ns); + err = vdso_join_timens(tsk, ns); + if (err) + return err; + get_time_ns(ns); put_time_ns(nsproxy->time_ns); nsproxy->time_ns = ns; -- cgit v1.2.3 From 04a8682a71becdb639ec9c0d82b315a2baef7a5d Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 12 Nov 2019 01:27:16 +0000 Subject: fs/proc: Introduce /proc/pid/timens_offsets API to set time namespace offsets for children processes, i.e.: echo "$clockid $offset_sec $offset_nsec" > /proc/self/timens_offsets Co-developed-by: Dmitry Safonov Signed-off-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191112012724.250792-28-dima@arista.com --- kernel/time/namespace.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index 0732964803b9..12858507d75a 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -334,6 +335,106 @@ static struct user_namespace *timens_owner(struct ns_common *ns) return to_time_ns(ns)->user_ns; } +static void show_offset(struct seq_file *m, int clockid, struct timespec64 *ts) +{ + seq_printf(m, "%d %lld %ld\n", clockid, ts->tv_sec, ts->tv_nsec); +} + +void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m) +{ + struct ns_common *ns; + struct time_namespace *time_ns; + + ns = timens_for_children_get(p); + if (!ns) + return; + time_ns = to_time_ns(ns); + + show_offset(m, CLOCK_MONOTONIC, &time_ns->offsets.monotonic); + show_offset(m, CLOCK_BOOTTIME, &time_ns->offsets.boottime); + put_time_ns(time_ns); +} + +int proc_timens_set_offset(struct file *file, struct task_struct *p, + struct proc_timens_offset *offsets, int noffsets) +{ + struct ns_common *ns; + struct time_namespace *time_ns; + struct timespec64 tp; + int i, err; + + ns = timens_for_children_get(p); + if (!ns) + return -ESRCH; + time_ns = to_time_ns(ns); + + if (!file_ns_capable(file, time_ns->user_ns, CAP_SYS_TIME)) { + put_time_ns(time_ns); + return -EPERM; + } + + for (i = 0; i < noffsets; i++) { + struct proc_timens_offset *off = &offsets[i]; + + switch (off->clockid) { + case CLOCK_MONOTONIC: + ktime_get_ts64(&tp); + break; + case CLOCK_BOOTTIME: + ktime_get_boottime_ts64(&tp); + break; + default: + err = -EINVAL; + goto out; + } + + err = -ERANGE; + + if (off->val.tv_sec > KTIME_SEC_MAX || + off->val.tv_sec < -KTIME_SEC_MAX) + goto out; + + tp = timespec64_add(tp, off->val); + /* + * KTIME_SEC_MAX is divided by 2 to be sure that KTIME_MAX is + * still unreachable. + */ + if (tp.tv_sec < 0 || tp.tv_sec > KTIME_SEC_MAX / 2) + goto out; + } + + mutex_lock(&offset_lock); + if (time_ns->frozen_offsets) { + err = -EACCES; + goto out_unlock; + } + + err = 0; + /* Don't report errors after this line */ + for (i = 0; i < noffsets; i++) { + struct proc_timens_offset *off = &offsets[i]; + struct timespec64 *offset = NULL; + + switch (off->clockid) { + case CLOCK_MONOTONIC: + offset = &time_ns->offsets.monotonic; + break; + case CLOCK_BOOTTIME: + offset = &time_ns->offsets.boottime; + break; + } + + *offset = off->val; + } + +out_unlock: + mutex_unlock(&offset_lock); +out: + put_time_ns(time_ns); + + return err; +} + const struct proc_ns_operations timens_operations = { .name = "time", .type = CLONE_NEWTIME, -- cgit v1.2.3 From de95a991bb72e009f47e0c4bbc90fc5f594588d5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Dec 2019 20:56:19 -0800 Subject: tick/sched: Annotate lockless access to last_jiffies_update syzbot (KCSAN) reported a data-race in tick_do_update_jiffies64(): BUG: KCSAN: data-race in tick_do_update_jiffies64 / tick_do_update_jiffies64 write to 0xffffffff8603d008 of 8 bytes by interrupt on cpu 1: tick_do_update_jiffies64+0x100/0x250 kernel/time/tick-sched.c:73 tick_sched_do_timer+0xd4/0xe0 kernel/time/tick-sched.c:138 tick_sched_timer+0x43/0xe0 kernel/time/tick-sched.c:1292 __run_hrtimer kernel/time/hrtimer.c:1514 [inline] __hrtimer_run_queues+0x274/0x5f0 kernel/time/hrtimer.c:1576 hrtimer_interrupt+0x22a/0x480 kernel/time/hrtimer.c:1638 local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1110 [inline] smp_apic_timer_interrupt+0xdc/0x280 arch/x86/kernel/apic/apic.c:1135 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 arch_local_irq_restore arch/x86/include/asm/paravirt.h:756 [inline] kcsan_setup_watchpoint+0x1d4/0x460 kernel/kcsan/core.c:436 check_access kernel/kcsan/core.c:466 [inline] __tsan_read1 kernel/kcsan/core.c:593 [inline] __tsan_read1+0xc2/0x100 kernel/kcsan/core.c:593 kallsyms_expand_symbol.constprop.0+0x70/0x160 kernel/kallsyms.c:79 kallsyms_lookup_name+0x7f/0x120 kernel/kallsyms.c:170 insert_report_filterlist kernel/kcsan/debugfs.c:155 [inline] debugfs_write+0x14b/0x2d0 kernel/kcsan/debugfs.c:256 full_proxy_write+0xbd/0x100 fs/debugfs/file.c:225 __vfs_write+0x67/0xc0 fs/read_write.c:494 vfs_write fs/read_write.c:558 [inline] vfs_write+0x18a/0x390 fs/read_write.c:542 ksys_write+0xd5/0x1b0 fs/read_write.c:611 __do_sys_write fs/read_write.c:623 [inline] __se_sys_write fs/read_write.c:620 [inline] __x64_sys_write+0x4c/0x60 fs/read_write.c:620 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x44/0xa9 read to 0xffffffff8603d008 of 8 bytes by task 0 on cpu 0: tick_do_update_jiffies64+0x2b/0x250 kernel/time/tick-sched.c:62 tick_nohz_update_jiffies kernel/time/tick-sched.c:505 [inline] tick_nohz_irq_enter kernel/time/tick-sched.c:1257 [inline] tick_irq_enter+0x139/0x1c0 kernel/time/tick-sched.c:1274 irq_enter+0x4f/0x60 kernel/softirq.c:354 entering_irq arch/x86/include/asm/apic.h:517 [inline] entering_ack_irq arch/x86/include/asm/apic.h:523 [inline] smp_apic_timer_interrupt+0x55/0x280 arch/x86/kernel/apic/apic.c:1133 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 native_safe_halt+0xe/0x10 arch/x86/include/asm/irqflags.h:60 arch_cpu_idle+0xa/0x10 arch/x86/kernel/process.c:571 default_idle_call+0x1e/0x40 kernel/sched/idle.c:94 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x1af/0x280 kernel/sched/idle.c:263 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:355 rest_init+0xec/0xf6 init/main.c:452 arch_call_rest_init+0x17/0x37 start_kernel+0x838/0x85e init/main.c:786 x86_64_start_reservations+0x29/0x2b arch/x86/kernel/head64.c:490 x86_64_start_kernel+0x72/0x76 arch/x86/kernel/head64.c:471 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.4.0-rc7+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Use READ_ONCE() and WRITE_ONCE() to annotate this expected race. Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20191205045619.204946-1-edumazet@google.com --- kernel/time/tick-sched.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 8b192e67aabc..a792d21cac64 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -58,8 +58,9 @@ static void tick_do_update_jiffies64(ktime_t now) /* * Do a quick check without holding jiffies_lock: + * The READ_ONCE() pairs with two updates done later in this function. */ - delta = ktime_sub(now, last_jiffies_update); + delta = ktime_sub(now, READ_ONCE(last_jiffies_update)); if (delta < tick_period) return; @@ -70,8 +71,9 @@ static void tick_do_update_jiffies64(ktime_t now) if (delta >= tick_period) { delta = ktime_sub(delta, tick_period); - last_jiffies_update = ktime_add(last_jiffies_update, - tick_period); + /* Pairs with the lockless read in this function. */ + WRITE_ONCE(last_jiffies_update, + ktime_add(last_jiffies_update, tick_period)); /* Slow path for long timeouts */ if (unlikely(delta >= tick_period)) { @@ -79,8 +81,10 @@ static void tick_do_update_jiffies64(ktime_t now) ticks = ktime_divns(delta, incr); - last_jiffies_update = ktime_add_ns(last_jiffies_update, - incr * ticks); + /* Pairs with the lockless read in this function. */ + WRITE_ONCE(last_jiffies_update, + ktime_add_ns(last_jiffies_update, + incr * ticks)); } do_timer(++ticks); -- cgit v1.2.3 From 6b6d188aae79a630957aefd88ff5c42af6553ee3 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 9 Jan 2020 07:59:07 -0800 Subject: alarmtimer: Unregister wakeup source when module get fails The alarmtimer_rtc_add_device() function creates a wakeup source and then tries to grab a module reference. If that fails the function returns early with an error code, but fails to remove the wakeup source. Cleanup this exit path so there is no dangling wakeup source, which is named 'alarmtime' left allocated which will conflict with another RTC device that may be registered later. Fixes: 51218298a25e ("alarmtimer: Ensure RTC module is not unloaded") Signed-off-by: Stephen Boyd Signed-off-by: Thomas Gleixner Reviewed-by: Douglas Anderson Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200109155910.907-2-swboyd@chromium.org --- kernel/time/alarmtimer.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index b51b36e533c4..9dc7a0913190 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -91,6 +91,7 @@ static int alarmtimer_rtc_add_device(struct device *dev, unsigned long flags; struct rtc_device *rtc = to_rtc_device(dev); struct wakeup_source *__ws; + int ret = 0; if (rtcdev) return -EBUSY; @@ -105,8 +106,8 @@ static int alarmtimer_rtc_add_device(struct device *dev, spin_lock_irqsave(&rtcdev_lock, flags); if (!rtcdev) { if (!try_module_get(rtc->owner)) { - spin_unlock_irqrestore(&rtcdev_lock, flags); - return -1; + ret = -1; + goto unlock; } rtcdev = rtc; @@ -115,11 +116,12 @@ static int alarmtimer_rtc_add_device(struct device *dev, ws = __ws; __ws = NULL; } +unlock: spin_unlock_irqrestore(&rtcdev_lock, flags); wakeup_source_unregister(__ws); - return 0; + return ret; } static inline void alarmtimer_rtc_timer_init(void) -- cgit v1.2.3 From 5167c506d62dd9ffab73eba23c79b0a8845c9fe1 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Fri, 10 Jan 2020 16:39:02 +0800 Subject: tick/common: Touch watchdog in tick_unfreeze() on all CPUs Suspend to IDLE invokes tick_unfreeze() on resume. tick_unfreeze() on the first resuming CPU resumes timekeeping, which also has the side effect of resetting the softlockup watchdog on this CPU. But on the secondary CPUs the watchdog is not reset in the resume / unfreeze() path, which can result in false softlockup warnings on those CPUs depending on the time spent in suspend. Prevent this by clearing the softlock watchdog in the unfreeze path also on the secondary resuming CPUs. [ tglx: Massaged changelog ] Signed-off-by: Chunyan Zhang Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20200110083902.27276-1-chunyan.zhang@unisoc.com --- kernel/time/tick-common.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 59225b484e4e..7e5d3524e924 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -558,6 +559,7 @@ void tick_unfreeze(void) trace_suspend_resume(TPS("timekeeping_freeze"), smp_processor_id(), false); } else { + touch_softlockup_watchdog(); tick_resume_local(); } -- cgit v1.2.3 From 9a6b55ac4a44060bcb782baf002859b2a2c63267 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Jan 2020 19:52:38 +0100 Subject: lib/vdso: Make __arch_update_vdso_data() logic understandable The function name suggests that this is a boolean checking whether the architecture asks for an update of the VDSO data, but it works the other way round. To spare further confusion invert the logic. Fixes: 44f57d788e7d ("timekeeping: Provide a generic update_vsyscall() implementation") Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20200114185946.656652824@linutronix.de --- kernel/time/vsyscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index 5ee0f7709410..f0aab6182824 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -84,7 +84,7 @@ void update_vsyscall(struct timekeeper *tk) struct vdso_timestamp *vdso_ts; u64 nsec; - if (__arch_update_vdso_data()) { + if (!__arch_update_vdso_data()) { /* * Some architectures might want to skip the update of the * data page. -- cgit v1.2.3 From 9f24c540f7f8eb3a981528da9a9a636a5bdf5987 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Jan 2020 19:52:39 +0100 Subject: lib/vdso: Update coarse timekeeper unconditionally The low resolution parts of the VDSO, i.e.: clock_gettime(CLOCK_*_COARSE), clock_getres(), time() can be used even if there is no VDSO capable clocksource. But if an architecture opts out of the VDSO data update then this information becomes stale. This affects ARM when there is no architected timer available. The lack of update causes userspace to use stale data forever. Make the update of the low resolution parts unconditional and only skip the update of the high resolution parts if the architecture requests it. Fixes: 44f57d788e7d ("timekeeping: Provide a generic update_vsyscall() implementation") Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20200114185946.765577901@linutronix.de --- kernel/time/vsyscall.c | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index f0aab6182824..9577c89179cd 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -28,11 +28,6 @@ static inline void update_vdso_data(struct vdso_data *vdata, vdata[CS_RAW].mult = tk->tkr_raw.mult; vdata[CS_RAW].shift = tk->tkr_raw.shift; - /* CLOCK_REALTIME */ - vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME]; - vdso_ts->sec = tk->xtime_sec; - vdso_ts->nsec = tk->tkr_mono.xtime_nsec; - /* CLOCK_MONOTONIC */ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC]; vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; @@ -70,12 +65,6 @@ static inline void update_vdso_data(struct vdso_data *vdata, vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI]; vdso_ts->sec = tk->xtime_sec + (s64)tk->tai_offset; vdso_ts->nsec = tk->tkr_mono.xtime_nsec; - - /* - * Read without the seqlock held by clock_getres(). - * Note: No need to have a second copy. - */ - WRITE_ONCE(vdata[CS_HRES_COARSE].hrtimer_res, hrtimer_resolution); } void update_vsyscall(struct timekeeper *tk) @@ -84,20 +73,17 @@ void update_vsyscall(struct timekeeper *tk) struct vdso_timestamp *vdso_ts; u64 nsec; - if (!__arch_update_vdso_data()) { - /* - * Some architectures might want to skip the update of the - * data page. - */ - return; - } - /* copy vsyscall data */ vdso_write_begin(vdata); vdata[CS_HRES_COARSE].clock_mode = __arch_get_clock_mode(tk); vdata[CS_RAW].clock_mode = __arch_get_clock_mode(tk); + /* CLOCK_REALTIME also required for time() */ + vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME]; + vdso_ts->sec = tk->xtime_sec; + vdso_ts->nsec = tk->tkr_mono.xtime_nsec; + /* CLOCK_REALTIME_COARSE */ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE]; vdso_ts->sec = tk->xtime_sec; @@ -110,7 +96,18 @@ void update_vsyscall(struct timekeeper *tk) nsec = nsec + tk->wall_to_monotonic.tv_nsec; vdso_ts->sec += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &vdso_ts->nsec); - update_vdso_data(vdata, tk); + /* + * Read without the seqlock held by clock_getres(). + * Note: No need to have a second copy. + */ + WRITE_ONCE(vdata[CS_HRES_COARSE].hrtimer_res, hrtimer_resolution); + + /* + * Architectures can opt out of updating the high resolution part + * of the VDSO. + */ + if (__arch_update_vdso_data()) + update_vdso_data(vdata, tk); __arch_update_vsyscall(vdata, tk); -- cgit v1.2.3 From eb5a4d0a9ee976008d1add75e3d64545399e80a3 Mon Sep 17 00:00:00 2001 From: Jules Irenge Date: Mon, 20 Jan 2020 22:43:47 +0000 Subject: hrtimer: Add missing sparse annotation for __run_timer() Sparse reports a warning at __run_hrtimer() |warning: context imbalance in __run_hrtimer() - unexpected unlock Add the missing must_hold() annotation. Signed-off-by: Jules Irenge Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20200120224347.51843-1-jbi.octave@gmail.com --- kernel/time/hrtimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index d8b62f93fc8d..3a609e7344f3 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1477,7 +1477,7 @@ EXPORT_SYMBOL_GPL(hrtimer_active); static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, struct hrtimer_clock_base *base, struct hrtimer *timer, ktime_t *now, - unsigned long flags) + unsigned long flags) __must_hold(&cpu_base->lock) { enum hrtimer_restart (*fn)(struct hrtimer *); int restart; -- cgit v1.2.3 From 6b088cefbeaa87ba48bf838edfc1e19c9ff3976b Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 23 Jan 2020 21:58:49 -0800 Subject: alarmtimer: Update alarmtimer_get_rtcdev() docs to reflect reality This function doesn't do anything like this comment says when an RTC device hasn't been chosen. It looks like we used to do something like that before commit 8bc0dafb5cf3 ("alarmtimers: Rework RTC device selection using class interface") but that's long gone now. Remove this sentence to avoid confusing the reader. Signed-off-by: Stephen Boyd Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20200124055849.154411-5-swboyd@chromium.org --- kernel/time/alarmtimer.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 9dc7a0913190..564ff5df2b0b 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -69,8 +69,6 @@ static DEFINE_SPINLOCK(rtcdev_lock); * alarmtimer_get_rtcdev - Return selected rtcdevice * * This function returns the rtc device to use for wakealarms. - * If one has not already been chosen, it checks to see if a - * functional rtc device is available. */ struct rtc_device *alarmtimer_get_rtcdev(void) { -- cgit v1.2.3 From c79108bd19a8490315847e0c95ac6526fcd8e770 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 23 Jan 2020 21:58:46 -0800 Subject: alarmtimer: Make alarmtimer platform device child of RTC device The alarmtimer_suspend() function will fail if an RTC device is on a bus such as SPI or i2c and that RTC device registers and probes after alarmtimer_init() registers and probes the 'alarmtimer' platform device. This is because system wide suspend suspends devices in the reverse order of their probe. When alarmtimer_suspend() attempts to program the RTC for a wakeup it will try to program an RTC device on a bus that has already been suspended. Move the alarmtimer device registration to happen when the RTC which is used for wakeup is registered. Register the 'alarmtimer' platform device as a child of the RTC device too, so that it can be guaranteed that the RTC device won't be suspended when alarmtimer_suspend() is called. Reported-by: Douglas Anderson Signed-off-by: Stephen Boyd Signed-off-by: Thomas Gleixner Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20200124055849.154411-2-swboyd@chromium.org --- kernel/time/alarmtimer.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 564ff5df2b0b..f0469ccc84ee 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -89,6 +89,7 @@ static int alarmtimer_rtc_add_device(struct device *dev, unsigned long flags; struct rtc_device *rtc = to_rtc_device(dev); struct wakeup_source *__ws; + struct platform_device *pdev; int ret = 0; if (rtcdev) @@ -100,9 +101,11 @@ static int alarmtimer_rtc_add_device(struct device *dev, return -1; __ws = wakeup_source_register(dev, "alarmtimer"); + pdev = platform_device_register_data(dev, "alarmtimer", + PLATFORM_DEVID_AUTO, NULL, 0); spin_lock_irqsave(&rtcdev_lock, flags); - if (!rtcdev) { + if (__ws && !IS_ERR(pdev) && !rtcdev) { if (!try_module_get(rtc->owner)) { ret = -1; goto unlock; @@ -113,10 +116,14 @@ static int alarmtimer_rtc_add_device(struct device *dev, get_device(dev); ws = __ws; __ws = NULL; + pdev = NULL; + } else { + ret = -1; } unlock: spin_unlock_irqrestore(&rtcdev_lock, flags); + platform_device_unregister(pdev); wakeup_source_unregister(__ws); return ret; @@ -903,8 +910,7 @@ static void get_boottime_timespec(struct timespec64 *tp) */ static int __init alarmtimer_init(void) { - struct platform_device *pdev; - int error = 0; + int error; int i; alarmtimer_rtc_timer_init(); @@ -929,15 +935,7 @@ static int __init alarmtimer_init(void) if (error) goto out_if; - pdev = platform_device_register_simple("alarmtimer", -1, NULL, 0); - if (IS_ERR(pdev)) { - error = PTR_ERR(pdev); - goto out_drv; - } return 0; - -out_drv: - platform_driver_unregister(&alarmtimer_driver); out_if: alarmtimer_rtc_interface_remove(); return error; -- cgit v1.2.3 From 7c94caca877b0feeca6f5f7b07d48c508e20d58f Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 23 Jan 2020 21:58:47 -0800 Subject: alarmtimer: Use wakeup source from alarmtimer platform device Use the wakeup source that can be associated with the 'alarmtimer' platform device instead of registering another one by hand. Signed-off-by: Stephen Boyd Signed-off-by: Thomas Gleixner Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20200124055849.154411-3-swboyd@chromium.org --- kernel/time/alarmtimer.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index f0469ccc84ee..685ff57a1d87 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -58,8 +58,6 @@ static DEFINE_SPINLOCK(freezer_delta_lock); #endif #ifdef CONFIG_RTC_CLASS -static struct wakeup_source *ws; - /* rtc timer and device for setting alarm wakeups at suspend */ static struct rtc_timer rtctimer; static struct rtc_device *rtcdev; @@ -88,7 +86,6 @@ static int alarmtimer_rtc_add_device(struct device *dev, { unsigned long flags; struct rtc_device *rtc = to_rtc_device(dev); - struct wakeup_source *__ws; struct platform_device *pdev; int ret = 0; @@ -100,12 +97,13 @@ static int alarmtimer_rtc_add_device(struct device *dev, if (!device_may_wakeup(rtc->dev.parent)) return -1; - __ws = wakeup_source_register(dev, "alarmtimer"); pdev = platform_device_register_data(dev, "alarmtimer", PLATFORM_DEVID_AUTO, NULL, 0); + if (!IS_ERR(pdev)) + device_init_wakeup(&pdev->dev, true); spin_lock_irqsave(&rtcdev_lock, flags); - if (__ws && !IS_ERR(pdev) && !rtcdev) { + if (!IS_ERR(pdev) && !rtcdev) { if (!try_module_get(rtc->owner)) { ret = -1; goto unlock; @@ -114,8 +112,6 @@ static int alarmtimer_rtc_add_device(struct device *dev, rtcdev = rtc; /* hold a reference so it doesn't go away */ get_device(dev); - ws = __ws; - __ws = NULL; pdev = NULL; } else { ret = -1; @@ -124,7 +120,6 @@ unlock: spin_unlock_irqrestore(&rtcdev_lock, flags); platform_device_unregister(pdev); - wakeup_source_unregister(__ws); return ret; } @@ -291,7 +286,7 @@ static int alarmtimer_suspend(struct device *dev) return 0; if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) { - __pm_wakeup_event(ws, 2 * MSEC_PER_SEC); + pm_wakeup_event(dev, 2 * MSEC_PER_SEC); return -EBUSY; } @@ -306,7 +301,7 @@ static int alarmtimer_suspend(struct device *dev) /* Set alarm, if in the past reject suspend briefly to handle */ ret = rtc_timer_start(rtc, &rtctimer, now, 0); if (ret < 0) - __pm_wakeup_event(ws, MSEC_PER_SEC); + pm_wakeup_event(dev, MSEC_PER_SEC); return ret; } -- cgit v1.2.3 From fd928f3e32ba09381b287f8b732418434d932855 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 23 Jan 2020 21:58:48 -0800 Subject: alarmtimer: Make alarmtimer_get_rtcdev() a stub when CONFIG_RTC_CLASS=n The stubbed version of alarmtimer_get_rtcdev() is not exported. so this won't work if this function is used in a module when CONFIG_RTC_CLASS=n. Move the stub function to the header file and make it inline so that callers don't have to worry about linking against this symbol. rtcdev isn't used outside of this ifdef so it's not required to be redefined to NULL. Drop that while touching this area. Signed-off-by: Stephen Boyd Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20200124055849.154411-4-swboyd@chromium.org --- kernel/time/alarmtimer.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 685ff57a1d87..2ffb466af77e 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -143,11 +143,6 @@ static void alarmtimer_rtc_interface_remove(void) class_interface_unregister(&alarmtimer_rtc_interface); } #else -struct rtc_device *alarmtimer_get_rtcdev(void) -{ - return NULL; -} -#define rtcdev (NULL) static inline int alarmtimer_rtc_interface_setup(void) { return 0; } static inline void alarmtimer_rtc_interface_remove(void) { } static inline void alarmtimer_rtc_timer_init(void) { } -- cgit v1.2.3 From febac332a819f0e764aa4da62757ba21d18c182b Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 31 Jan 2020 19:08:59 +0300 Subject: clocksource: Prevent double add_timer_on() for watchdog_timer Kernel crashes inside QEMU/KVM are observed: kernel BUG at kernel/time/timer.c:1154! BUG_ON(timer_pending(timer) || !timer->function) in add_timer_on(). At the same time another cpu got: general protection fault: 0000 [#1] SMP PTI of poinson pointer 0xdead000000000200 in: __hlist_del at include/linux/list.h:681 (inlined by) detach_timer at kernel/time/timer.c:818 (inlined by) expire_timers at kernel/time/timer.c:1355 (inlined by) __run_timers at kernel/time/timer.c:1686 (inlined by) run_timer_softirq at kernel/time/timer.c:1699 Unfortunately kernel logs are badly scrambled, stacktraces are lost. Printing the timer->function before the BUG_ON() pointed to clocksource_watchdog(). The execution of clocksource_watchdog() can race with a sequence of clocksource_stop_watchdog() .. clocksource_start_watchdog(): expire_timers() detach_timer(timer, true); timer->entry.pprev = NULL; raw_spin_unlock_irq(&base->lock); call_timer_fn clocksource_watchdog() clocksource_watchdog_kthread() or clocksource_unbind() spin_lock_irqsave(&watchdog_lock, flags); clocksource_stop_watchdog(); del_timer(&watchdog_timer); watchdog_running = 0; spin_unlock_irqrestore(&watchdog_lock, flags); spin_lock_irqsave(&watchdog_lock, flags); clocksource_start_watchdog(); add_timer_on(&watchdog_timer, ...); watchdog_running = 1; spin_unlock_irqrestore(&watchdog_lock, flags); spin_lock(&watchdog_lock); add_timer_on(&watchdog_timer, ...); BUG_ON(timer_pending(timer) || !timer->function); timer_pending() -> true BUG() I.e. inside clocksource_watchdog() watchdog_timer could be already armed. Check timer_pending() before calling add_timer_on(). This is sufficient as all operations are synchronized by watchdog_lock. Fixes: 75c5158f70c0 ("timekeeping: Update clocksource with stop_machine") Signed-off-by: Konstantin Khlebnikov Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/158048693917.4378.13823603769948933793.stgit@buzz --- kernel/time/clocksource.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index fff5f64981c6..428beb69426a 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -293,8 +293,15 @@ static void clocksource_watchdog(struct timer_list *unused) next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); if (next_cpu >= nr_cpu_ids) next_cpu = cpumask_first(cpu_online_mask); - watchdog_timer.expires += WATCHDOG_INTERVAL; - add_timer_on(&watchdog_timer, next_cpu); + + /* + * Arm timer if not already pending: could race with concurrent + * pair clocksource_stop_watchdog() clocksource_start_watchdog(). + */ + if (!timer_pending(&watchdog_timer)) { + watchdog_timer.expires += WATCHDOG_INTERVAL; + add_timer_on(&watchdog_timer, next_cpu); + } out: spin_unlock(&watchdog_lock); } -- cgit v1.2.3 From 412c53a680a97cb1ae2c0ab60230e193bee86387 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 20 Feb 2020 20:03:54 -0800 Subject: y2038: remove unused time32 interfaces No users remain, so kill these off before we grow new ones. Link: http://lkml.kernel.org/r/20200110154232.4104492-3-arnd@arndb.de Signed-off-by: Arnd Bergmann Acked-by: Thomas Gleixner Cc: Deepa Dinamani Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/time.c | 43 ------------------------------------------- 1 file changed, 43 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/time.c b/kernel/time/time.c index cdd7386115ff..3985b2b32d08 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -449,49 +449,6 @@ time64_t mktime64(const unsigned int year0, const unsigned int mon0, } EXPORT_SYMBOL(mktime64); -/** - * ns_to_timespec - Convert nanoseconds to timespec - * @nsec: the nanoseconds value to be converted - * - * Returns the timespec representation of the nsec parameter. - */ -struct timespec ns_to_timespec(const s64 nsec) -{ - struct timespec ts; - s32 rem; - - if (!nsec) - return (struct timespec) {0, 0}; - - ts.tv_sec = div_s64_rem(nsec, NSEC_PER_SEC, &rem); - if (unlikely(rem < 0)) { - ts.tv_sec--; - rem += NSEC_PER_SEC; - } - ts.tv_nsec = rem; - - return ts; -} -EXPORT_SYMBOL(ns_to_timespec); - -/** - * ns_to_timeval - Convert nanoseconds to timeval - * @nsec: the nanoseconds value to be converted - * - * Returns the timeval representation of the nsec parameter. - */ -struct timeval ns_to_timeval(const s64 nsec) -{ - struct timespec ts = ns_to_timespec(nsec); - struct timeval tv; - - tv.tv_sec = ts.tv_sec; - tv.tv_usec = (suseconds_t) ts.tv_nsec / 1000; - - return tv; -} -EXPORT_SYMBOL(ns_to_timeval); - struct __kernel_old_timeval ns_to_kernel_old_timeval(const s64 nsec) { struct timespec64 ts = ns_to_timespec64(nsec); -- cgit v1.2.3