From f872f5400cc01373d8e29d9c7a5296ccfaf4ccf3 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 29 Dec 2015 20:12:19 -0800 Subject: mm: Add a vm_special_mapping.fault() method Requiring special mappings to give a list of struct pages is inflexible: it prevents sane use of IO memory in a special mapping, it's inefficient (it requires arch code to initialize a list of struct pages, and it requires the mm core to walk the entire list just to figure out how long it is), and it prevents arch code from doing anything fancy when a special mapping fault occurs. Add a .fault method as an alternative to filling in a .pages array. Looks-OK-to: Andrew Morton Signed-off-by: Andy Lutomirski Reviewed-by: Kees Cook Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/a26d1677c0bc7e774c33f469451a78ca31e9e6af.1451446564.git.luto@kernel.org Signed-off-by: Ingo Molnar --- include/linux/mm_types.h | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index f8d1492a114f..c88e48a3c155 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -568,10 +568,26 @@ static inline void clear_tlb_flush_pending(struct mm_struct *mm) } #endif -struct vm_special_mapping -{ - const char *name; +struct vm_fault; + +struct vm_special_mapping { + const char *name; /* The name, e.g. "[vdso]". */ + + /* + * If .fault is not provided, this points to a + * NULL-terminated array of pages that back the special mapping. + * + * This must not be NULL unless .fault is provided. + */ struct page **pages; + + /* + * If non-NULL, then this is called to resolve page faults + * on the special mapping. If used, .pages is not checked. + */ + int (*fault)(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, + struct vm_fault *vmf); }; enum tlb_flush_reason { -- cgit v1.2.3 From 1745cbc5d0dee0749a6bc0ea8e872c5db0074061 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 29 Dec 2015 20:12:20 -0800 Subject: mm: Add vm_insert_pfn_prot() The x86 vvar vma contains pages with differing cacheability flags. x86 currently implements this by manually inserting all the ptes using (io_)remap_pfn_range when the vma is set up. x86 wants to move to using .fault with VM_FAULT_NOPAGE to set up the mappings as needed. The correct API to use to insert a pfn in .fault is vm_insert_pfn(), but vm_insert_pfn() can't override the vma's cache mode, and the HPET page in particular needs to be uncached despite the fact that the rest of the VMA is cached. Add vm_insert_pfn_prot() to support varying cacheability within the same non-COW VMA in a more sane manner. x86 could alternatively use multiple VMAs, but that's messy, would break CRIU, and would create unnecessary VMAs that would waste memory. Signed-off-by: Andy Lutomirski Reviewed-by: Kees Cook Acked-by: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/d2938d1eb37be7a5e4f86182db646551f11e45aa.1451446564.git.luto@kernel.org Signed-off-by: Ingo Molnar --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 00bad7793788..87ef1d7730ba 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2080,6 +2080,8 @@ int remap_pfn_range(struct vm_area_struct *, unsigned long addr, int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn); +int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, pgprot_t pgprot); int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn); int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); -- cgit v1.2.3 From dd42ac8f02aea32661756554aace2095f7181d34 Mon Sep 17 00:00:00 2001 From: Alexander Kuleshov Date: Fri, 16 Oct 2015 15:20:53 +0600 Subject: clockevents: Rename last parameter of clocks_calc_mult_shift() to maxsec Last parameter of the clocks_calc_mult_shift() was renamed from minsec to maxsec in the 5fdade95 (time: Rename misnamed minsec argument of clocks_calc_mult_shift()). Signed-off-by: Alexander Kuleshov Link: http://lkml.kernel.org/r/1444987253-11018-1-git-send-email-kuleshovmail@gmail.com Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index bdcf358dfce2..0d442e34c349 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -190,9 +190,9 @@ extern void clockevents_config_and_register(struct clock_event_device *dev, extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq); static inline void -clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec) +clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 maxsec) { - return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, minsec); + return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, maxsec); } extern void clockevents_suspend(void); -- cgit v1.2.3 From 9c808765e88efb6fa6af7e2206ef89512f1840a7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 15 Jan 2016 17:41:08 +0000 Subject: hrtimer: Add support for CLOCK_MONOTONIC_RAW The KVM/ARM timer implementation arms a hrtimer when a vcpu is blocked (usually because it is waiting for an interrupt) while its timer is going to kick in the future. It is essential that this timer doesn't get adjusted, or the guest will end up being woken-up at the wrong time (NTP running on the host seems to confuse the hell out of some guests). In order to allow this, let's add CLOCK_MONOTONIC_RAW support to hrtimer (it is so far only supported for posix timers). It also has the (limited) benefit of fixing de0421d53bfb ("mac80211_hwsim: shuffle code to prepare for dynamic radios"), which already uses this functionnality without realizing wasn't implemented (just being lucky...). Signed-off-by: Marc Zyngier Cc: Tomasz Nowicki Cc: Christoffer Dall Link: http://lkml.kernel.org/r/1452879670-16133-2-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 76dd4f0da5ca..a6d64af5e73f 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -151,6 +151,7 @@ enum hrtimer_base_type { HRTIMER_BASE_REALTIME, HRTIMER_BASE_BOOTTIME, HRTIMER_BASE_TAI, + HRTIMER_BASE_MONOTONIC_RAW, HRTIMER_MAX_CLOCK_BASES, }; -- cgit v1.2.3 From ad315455d396a1cbcb2f9fdd687b7e1b26b789e7 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 29 Dec 2015 12:18:46 +0800 Subject: sparse: Add __private to privatize members of structs In C programming language, we don't have a easy way to privatize a member of a structure. However in kernel, sometimes there is a need to privatize a member in case of potential bugs or misuses. Fortunately, the noderef attribute of sparse is a way to privatize a member, as by defining a member as noderef, the address-of operator on the member will produce a noderef pointer to that member, and if anyone wants to dereference that kind of pointers to read or modify the member, sparse will yell. Based on this, __private modifier and related operation ACCESS_PRIVATE() are introduced, which could help detect undesigned public uses of private members of structs. Here is an example of sparse's output if it detect an undersigned public use: | kernel/rcu/tree.c:4453:25: warning: incorrect type in argument 1 (different modifiers) | kernel/rcu/tree.c:4453:25: expected struct raw_spinlock [usertype] *lock | kernel/rcu/tree.c:4453:25: got struct raw_spinlock [noderef] * Also, this patch improves compiler.h a little bit by adding comments for "#else" and "#endif". Signed-off-by: Boqun Feng Signed-off-by: Paul E. McKenney --- include/linux/compiler.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 00b042c49ccd..c845356952bb 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -20,12 +20,14 @@ # define __pmem __attribute__((noderef, address_space(5))) #ifdef CONFIG_SPARSE_RCU_POINTER # define __rcu __attribute__((noderef, address_space(4))) -#else +#else /* CONFIG_SPARSE_RCU_POINTER */ # define __rcu -#endif +#endif /* CONFIG_SPARSE_RCU_POINTER */ +# define __private __attribute__((noderef)) extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); -#else +# define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member)) +#else /* __CHECKER__ */ # define __user # define __kernel # define __safe @@ -44,7 +46,9 @@ extern void __chk_io_ptr(const volatile void __iomem *); # define __percpu # define __rcu # define __pmem -#endif +# define __private +# define ACCESS_PRIVATE(p, member) ((p)->member) +#endif /* __CHECKER__ */ /* Indirect macros required for expanded argument pasting, eg. __LINE__. */ #define ___PASTE(a,b) a##b -- cgit v1.2.3 From b354286effa52da6cb1b1f16604d41ff81b8c445 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 29 Dec 2015 12:18:48 +0800 Subject: irq: Privatize irq_common_data::state_use_accessors irq_common_data::state_use_accessors is not designed for public use. Therefore make it private so that people who write code accessing it directly will get blamed by sparse. Also #undef the macro __irqd_to_state after used in header files, so that the macro can't be misused. Signed-off-by: Boqun Feng Reviewed-by: Thomas Gleixner Signed-off-by: Paul E. McKenney --- include/linux/irq.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 3c1c96786248..cd14cd4a22b4 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -137,7 +137,7 @@ struct irq_domain; * @msi_desc: MSI descriptor */ struct irq_common_data { - unsigned int state_use_accessors; + unsigned int __private state_use_accessors; #ifdef CONFIG_NUMA unsigned int node; #endif @@ -208,7 +208,7 @@ enum { IRQD_FORWARDED_TO_VCPU = (1 << 20), }; -#define __irqd_to_state(d) ((d)->common->state_use_accessors) +#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) static inline bool irqd_is_setaffinity_pending(struct irq_data *d) { @@ -299,6 +299,8 @@ static inline void irqd_clr_forwarded_to_vcpu(struct irq_data *d) __irqd_to_state(d) &= ~IRQD_FORWARDED_TO_VCPU; } +#undef __irqd_to_state + static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) { return d->hwirq; -- cgit v1.2.3 From 9de630c4f264dec48e61edd871cb98b8e1b58250 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 8 Jan 2016 07:43:50 -0800 Subject: rcu: Document unique-name limitation for DEFINE_STATIC_SRCU() SRCU uses per-CPU variables, and DEFINE_STATIC_SRCU() uses a static per-CPU variable. However, per-CPU variables have significant restrictions, for example, names of per-CPU variables must be globally unique, even if declared static. These restrictions carry over to DEFINE_STATIC_SRCU(), and this commit therefore documents these restrictions. Reported-by: Stephen Rothwell Reported-by: kbuild test robot Suggested-by: Boqun Feng Signed-off-by: Paul E. McKenney Reviewed-by: Tejun Heo --- include/linux/srcu.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index f5f80c5643ac..dc8eb63c6568 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -99,8 +99,23 @@ void process_srcu(struct work_struct *work); } /* - * define and init a srcu struct at build time. - * dont't call init_srcu_struct() nor cleanup_srcu_struct() on it. + * Define and initialize a srcu struct at build time. + * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it. + * + * Note that although DEFINE_STATIC_SRCU() hides the name from other + * files, the per-CPU variable rules nevertheless require that the + * chosen name be globally unique. These rules also prohibit use of + * DEFINE_STATIC_SRCU() within a function. If these rules are too + * restrictive, declare the srcu_struct manually. For example, in + * each file: + * + * static struct srcu_struct my_srcu; + * + * Then, before the first use of each my_srcu, manually initialize it: + * + * init_srcu_struct(&my_srcu); + * + * See include/linux/percpu-defs.h for the rules on per-CPU variables. */ #define __DEFINE_SRCU(name, is_static) \ static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\ -- cgit v1.2.3 From 3500efae4410454522697c94c23fc40323c0cee9 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Mon, 19 Oct 2015 14:45:02 -0700 Subject: rcu: Remove rcu_user_hooks_switch Because there are neither uses nor intended uses for the rcu_user_hooks_switch() function that was orginally intended for nohz use, this commit removes it. Signed-off-by: Yang Shi Acked-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 14e6f47ee16f..b5d48bd56e3f 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -360,8 +360,6 @@ void rcu_user_exit(void); #else static inline void rcu_user_enter(void) { } static inline void rcu_user_exit(void) { } -static inline void rcu_user_hooks_switch(struct task_struct *prev, - struct task_struct *next) { } #endif /* CONFIG_NO_HZ_FULL */ #ifdef CONFIG_RCU_NOCB_CPU -- cgit v1.2.3 From 0abefbaab4edbcec637e00fefcdeccb52797fe4f Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 8 Dec 2015 13:20:12 +0000 Subject: genirq: Add new IPI irqdomain flags These flags will be used to identify an IPI domain. We have two flavours of IPI implementations: IRQ_DOMAIN_FLAG_IPI_PER_CPU: Each CPU has its own virq and hwirq IRQ_DOMAIN_FLAG_IPI_SINGLE : A single virq and hwirq for all CPUs Signed-off-by: Qais Yousef Cc: Cc: Cc: Cc: Cc: Cc: Cc: Qais Yousef Link: http://lkml.kernel.org/r/1449580830-23652-2-git-send-email-qais.yousef@imgtec.com Signed-off-by: Thomas Gleixner --- include/linux/irqdomain.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 04579d9fbce4..9bb0a9cfc1c4 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -172,6 +172,12 @@ enum { /* Core calls alloc/free recursive through the domain hierarchy. */ IRQ_DOMAIN_FLAG_AUTO_RECURSIVE = (1 << 1), + /* Irq domain is an IPI domain with virq per cpu */ + IRQ_DOMAIN_FLAG_IPI_PER_CPU = (1 << 2), + + /* Irq domain is an IPI domain with single virq */ + IRQ_DOMAIN_FLAG_IPI_SINGLE = (1 << 3), + /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved * for implementation specific purposes and ignored by the @@ -400,6 +406,22 @@ static inline bool irq_domain_is_hierarchy(struct irq_domain *domain) { return domain->flags & IRQ_DOMAIN_FLAG_HIERARCHY; } + +static inline bool irq_domain_is_ipi(struct irq_domain *domain) +{ + return domain->flags & + (IRQ_DOMAIN_FLAG_IPI_PER_CPU | IRQ_DOMAIN_FLAG_IPI_SINGLE); +} + +static inline bool irq_domain_is_ipi_per_cpu(struct irq_domain *domain) +{ + return domain->flags & IRQ_DOMAIN_FLAG_IPI_PER_CPU; +} + +static inline bool irq_domain_is_ipi_single(struct irq_domain *domain) +{ + return domain->flags & IRQ_DOMAIN_FLAG_IPI_SINGLE; +} #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ static inline void irq_domain_activate_irq(struct irq_data *data) { } static inline void irq_domain_deactivate_irq(struct irq_data *data) { } @@ -413,6 +435,21 @@ static inline bool irq_domain_is_hierarchy(struct irq_domain *domain) { return false; } + +static inline bool irq_domain_is_ipi(struct irq_domain *domain) +{ + return false; +} + +static inline bool irq_domain_is_ipi_per_cpu(struct irq_domain *domain) +{ + return false; +} + +static inline bool irq_domain_is_ipi_single(struct irq_domain *domain) +{ + return false; +} #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ #else /* CONFIG_IRQ_DOMAIN */ -- cgit v1.2.3 From 29d5c8db26ad54592436508819ac617119306f96 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 8 Dec 2015 13:20:13 +0000 Subject: genirq: Add DOMAIN_BUS_IPI We need a way to search and match IPI domains. Using the new enum we can use irq_find_matching_host() to do that. Signed-off-by: Qais Yousef Cc: Cc: Cc: Cc: Cc: Cc: Cc: Qais Yousef Link: http://lkml.kernel.org/r/1449580830-23652-3-git-send-email-qais.yousef@imgtec.com Signed-off-by: Thomas Gleixner --- include/linux/irqdomain.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 9bb0a9cfc1c4..130e1c3117c3 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -74,6 +74,7 @@ enum irq_domain_bus_token { DOMAIN_BUS_PCI_MSI, DOMAIN_BUS_PLATFORM_MSI, DOMAIN_BUS_NEXUS, + DOMAIN_BUS_IPI, }; /** -- cgit v1.2.3 From 955bfe5912e7839abcc83694f06867535487404b Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 8 Dec 2015 13:20:17 +0000 Subject: genirq: Add an extra comment about the use of affinity in irq_common_data Affinity will have dual meaning depends on the type of the irq. If it is a normal irq, it'll have the standard affinity meaning. If it is an IPI, it will hold the mask of the cpus to which an IPI can be sent. Signed-off-by: Qais Yousef Cc: Cc: Cc: Cc: Cc: Cc: Cc: Qais Yousef Link: http://lkml.kernel.org/r/1449580830-23652-7-git-send-email-qais.yousef@imgtec.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 3c1c96786248..0817afd0d719 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -133,7 +133,9 @@ struct irq_domain; * Use accessor functions to deal with it * @node: node index useful for balancing * @handler_data: per-IRQ data for the irq_chip methods - * @affinity: IRQ affinity on SMP + * @affinity: IRQ affinity on SMP. If this is an IPI + * related irq, then this is the mask of the + * CPUs to which an IPI can be sent. * @msi_desc: MSI descriptor */ struct irq_common_data { -- cgit v1.2.3 From f256c9a0c54820ffef21b126f8226be2bece3dd7 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 8 Dec 2015 13:20:16 +0000 Subject: genirq: Add ipi_offset to irq_common_data IPIs are always assumed to be consecutively allocated, hence virqs and hwirqs can be inferred by using CPU id as an offset. But the first cpu doesn't always have to start at offset 0. ipi_offset stores the position of the first cpu so that we can easily calculate the virq or hwirq of an IPI associated with a specific cpu. Signed-off-by: Qais Yousef Cc: Cc: Cc: Cc: Cc: Cc: Cc: Qais Yousef Link: http://lkml.kernel.org/r/1449580830-23652-6-git-send-email-qais.yousef@imgtec.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 0817afd0d719..a32b47fbf874 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -137,6 +137,7 @@ struct irq_domain; * related irq, then this is the mask of the * CPUs to which an IPI can be sent. * @msi_desc: MSI descriptor + * @ipi_offset: Offset of first IPI target cpu in @affinity. Optional. */ struct irq_common_data { unsigned int state_use_accessors; @@ -146,6 +147,9 @@ struct irq_common_data { void *handler_data; struct msi_desc *msi_desc; cpumask_var_t affinity; +#ifdef CONFIG_GENERIC_IRQ_IPI + unsigned int ipi_offset; +#endif }; /** -- cgit v1.2.3 From ac0a0cd266d1f21236d5975ca6bced9b377a2a6a Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 8 Dec 2015 13:20:18 +0000 Subject: genirq: Make irq_domain_alloc_descs() non static We will need to use this function to implement irq_reserve_ipi() later. So make it non static and move the prototype to irqdomain.h to allow using it outside irqdomain.c Signed-off-by: Qais Yousef Cc: Cc: Cc: Cc: Cc: Cc: Cc: Qais Yousef Link: http://lkml.kernel.org/r/1449580830-23652-8-git-send-email-qais.yousef@imgtec.com Signed-off-by: Thomas Gleixner --- include/linux/irqdomain.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 130e1c3117c3..c466cc17b8e9 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -213,6 +213,8 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, extern struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, enum irq_domain_bus_token bus_token); extern void irq_set_default_host(struct irq_domain *host); +extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, + irq_hw_number_t hwirq, int node); static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node) { -- cgit v1.2.3 From d17bf24e695290d3fe7943aca52ab48098a10653 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 8 Dec 2015 13:20:19 +0000 Subject: genirq: Add a new generic IPI reservation code to irq core Add a generic mechanism to dynamically allocate an IPI. Depending on the underlying implementation this creates either a single Linux irq or a consective range of Linux irqs. The Linux irq is used later to send IPIs to other CPUs. [ tglx: Massaged the code and removed the 'consecutive mask' restriction for the single IRQ case ] Signed-off-by: Qais Yousef Cc: Cc: Cc: Cc: Cc: Cc: Cc: Qais Yousef Link: http://lkml.kernel.org/r/1449580830-23652-9-git-send-email-qais.yousef@imgtec.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 3 +++ include/linux/irqdomain.h | 5 +++++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index a32b47fbf874..95f4f66f95f3 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -940,4 +940,7 @@ static inline u32 irq_reg_readl(struct irq_chip_generic *gc, return readl(gc->reg_base + reg_offset); } +/* Contrary to Linux irqs, for hardware irqs the irq number 0 is valid */ +#define INVALID_HWIRQ (~0UL) + #endif /* _LINUX_IRQ_H */ diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index c466cc17b8e9..ed48594e96d2 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -344,6 +344,11 @@ int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr, const u32 *intspec, unsigned int intsize, irq_hw_number_t *out_hwirq, unsigned int *out_type); +/* IPI functions */ +unsigned int irq_reserve_ipi(struct irq_domain *domain, + const struct cpumask *dest); +void irq_destroy_ipi(unsigned int irq); + /* V2 interfaces to support hierarchy IRQ domains. */ extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, unsigned int virq); -- cgit v1.2.3 From f9bce791ae2a1a10a965b30427f5507c1a77669f Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 8 Dec 2015 13:20:20 +0000 Subject: genirq: Add a new function to get IPI reverse mapping When dealing with coprocessors we need to find out the actual hwirqs values to pass on to the firmware so that it knows what it needs to use to receive IPIs from and send IPIs to Linux cpus. [ tglx: Fixed the single hwirq IPI case. The hardware irq number does not change due to the cpu number ] Signed-off-by: Qais Yousef Cc: Cc: Cc: Cc: Cc: Cc: Cc: Qais Yousef Link: http://lkml.kernel.org/r/1449580830-23652-10-git-send-email-qais.yousef@imgtec.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 95f4f66f95f3..10273dce058a 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -942,5 +942,6 @@ static inline u32 irq_reg_readl(struct irq_chip_generic *gc, /* Contrary to Linux irqs, for hardware irqs the irq number 0 is valid */ #define INVALID_HWIRQ (~0UL) +irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu); #endif /* _LINUX_IRQ_H */ -- cgit v1.2.3 From 34dc1ae101018dbb50e1d04e88aa89052802a7db Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 8 Dec 2015 13:20:21 +0000 Subject: genirq: Add send_ipi callbacks to irq_chip Introduce the new callbacks which can be used by the core code to implement a generic IPI send mechanism. Signed-off-by: Qais Yousef Cc: Cc: Cc: Cc: Cc: Cc: Cc: Qais Yousef Link: http://lkml.kernel.org/r/1449580830-23652-11-git-send-email-qais.yousef@imgtec.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 10273dce058a..3b3a5b817469 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -347,6 +347,8 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @irq_get_irqchip_state: return the internal state of an interrupt * @irq_set_irqchip_state: set the internal state of a interrupt * @irq_set_vcpu_affinity: optional to target a vCPU in a virtual machine + * @ipi_send_single: send a single IPI to destination cpus + * @ipi_send_mask: send an IPI to destination cpus in cpumask * @flags: chip specific flags */ struct irq_chip { @@ -391,6 +393,9 @@ struct irq_chip { int (*irq_set_vcpu_affinity)(struct irq_data *data, void *vcpu_info); + void (*ipi_send_single)(struct irq_data *data, unsigned int cpu); + void (*ipi_send_mask)(struct irq_data *data, const struct cpumask *dest); + unsigned long flags; }; -- cgit v1.2.3 From 3b8e29a82dd16c1f2061e0b955a71cd36eeb061b Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 8 Dec 2015 13:20:22 +0000 Subject: genirq: Implement ipi_send_mask/single() Add APIs to send IPIs from driver and arch code. We have different functions because we allow architecture code to cache the irq descriptor to avoid lookups. Driver code has to use the irq number and is subject to more restrictive checks. [ tglx: Polish the implementation ] Signed-off-by: Qais Yousef Cc: Cc: Cc: Cc: Cc: Cc: Cc: Qais Yousef Link: http://lkml.kernel.org/r/1449580830-23652-12-git-send-email-qais.yousef@imgtec.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 3b3a5b817469..d5ebd94822d2 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -948,5 +948,9 @@ static inline u32 irq_reg_readl(struct irq_chip_generic *gc, /* Contrary to Linux irqs, for hardware irqs the irq number 0 is valid */ #define INVALID_HWIRQ (~0UL) irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu); +int __ipi_send_single(struct irq_desc *desc, unsigned int cpu); +int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest); +int ipi_send_single(unsigned int virq, unsigned int cpu); +int ipi_send_mask(unsigned int virq, const struct cpumask *dest); #endif /* _LINUX_IRQ_H */ -- cgit v1.2.3 From bb11cff327e54179c13446c4022ed4ed7d4871c7 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 8 Dec 2015 13:20:28 +0000 Subject: MIPS: Make smp CMP, CPS and MT use the new generic IPI functions This commit does several things to avoid breaking bisectability. 1- Remove IPI init code from irqchip/mips-gic 2- Implement the new irqchip->send_ipi() in irqchip/mips-gic 3- Select GENERIC_IRQ_IPI Kconfig symbol for MIPS_GIC 4- Change MIPS SMP to use the generic IPI implementation Only the SMP variants that use GIC were converted as it's the only irqchip that will have the support for generic IPI for now. Signed-off-by: Qais Yousef Acked-by: Ralf Baechle Cc: Cc: Cc: Cc: Cc: Cc: Qais Yousef Link: http://lkml.kernel.org/r/1449580830-23652-18-git-send-email-qais.yousef@imgtec.com Signed-off-by: Thomas Gleixner --- include/linux/irqchip/mips-gic.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index ce824db48d64..80f89e4a29ac 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -261,9 +261,6 @@ extern void gic_write_compare(cycle_t cnt); extern void gic_write_cpu_compare(cycle_t cnt, int cpu); extern void gic_start_count(void); extern void gic_stop_count(void); -extern void gic_send_ipi(unsigned int intr); -extern unsigned int plat_ipi_call_int_xlate(unsigned int); -extern unsigned int plat_ipi_resched_int_xlate(unsigned int); extern int gic_get_c0_compare_int(void); extern int gic_get_c0_perfcount_int(void); extern int gic_get_c0_fdc_int(void); -- cgit v1.2.3 From 7aca0c07207385cca76025cc85231519935722b9 Mon Sep 17 00:00:00 2001 From: Alexander Kuleshov Date: Fri, 26 Feb 2016 19:14:13 -0800 Subject: clocksource: Introduce clocksource_freq2mult() The clocksource_khz2mult() and clocksource_hz2mult() share similar code wihch calculates a mult from the given frequency. Both implementations in differ only in value of a frequency. This patch introduces the clocksource_freq2mult() helper with generic implementation of mult calculation to prevent code duplication. Signed-off-by: Alexander Kuleshov Signed-off-by: John Stultz Cc: Prarit Bhargava Cc: Richard Cochran Link: http://lkml.kernel.org/r/1456542854-22104-2-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 45 +++++++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 6013021a3b39..a307bf62974f 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -118,6 +118,23 @@ struct clocksource { /* simplify initialization of mask field */ #define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1) +static inline u32 clocksource_freq2mult(u32 freq, u32 shift_constant, u64 from) +{ + /* freq = cyc/from + * mult/2^shift = ns/cyc + * mult = ns/cyc * 2^shift + * mult = from/freq * 2^shift + * mult = from * 2^shift / freq + * mult = (from< Date: Fri, 26 Feb 2016 18:43:23 +0000 Subject: cpu/hotplug: Restructure FROZEN state handling There are only a few callbacks which really care about FROZEN vs. !FROZEN. No need to have extra states for this. Publish the frozen state in an extra variable which is updated under the hotplug lock and let the users interested deal with it w/o imposing that extra state checks on everyone. Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Rik van Riel Cc: Rafael Wysocki Cc: "Srivatsa S. Bhat" Cc: Peter Zijlstra Cc: Arjan van de Ven Cc: Sebastian Siewior Cc: Rusty Russell Cc: Steven Rostedt Cc: Oleg Nesterov Cc: Tejun Heo Cc: Andrew Morton Cc: Paul McKenney Cc: Linus Torvalds Cc: Paul Turner Link: http://lkml.kernel.org/r/20160226182340.334912357@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index d2ca8c38f9c4..f2fb54938ee6 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -118,6 +118,7 @@ enum { #ifdef CONFIG_SMP +extern bool cpuhp_tasks_frozen; /* Need to know about CPUs going up/down? */ #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) #define cpu_notifier(fn, pri) { \ @@ -177,6 +178,7 @@ extern void cpu_maps_update_done(void); #define cpu_notifier_register_done cpu_maps_update_done #else /* CONFIG_SMP */ +#define cpuhp_tasks_frozen 0 #define cpu_notifier(fn, pri) do { (void)(fn); } while (0) #define __cpu_notifier(fn, pri) do { (void)(fn); } while (0) -- cgit v1.2.3 From cff7d378d3fdbb53db9b6e2578b14855f401cd41 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 26 Feb 2016 18:43:28 +0000 Subject: cpu/hotplug: Convert to a state machine for the control processor Move the split out steps into a callback array and let the cpu_up/down code iterate through the array functions. For now most of the callbacks are asymmetric to resemble the current hotplug maze. Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Rik van Riel Cc: Rafael Wysocki Cc: "Srivatsa S. Bhat" Cc: Peter Zijlstra Cc: Arjan van de Ven Cc: Sebastian Siewior Cc: Rusty Russell Cc: Steven Rostedt Cc: Oleg Nesterov Cc: Tejun Heo Cc: Andrew Morton Cc: Paul McKenney Cc: Linus Torvalds Cc: Paul Turner Link: http://lkml.kernel.org/r/20160226182340.671816690@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpu.h | 9 ++++----- include/linux/cpuhotplug.h | 13 +++++++++++++ 2 files changed, 17 insertions(+), 5 deletions(-) create mode 100644 include/linux/cpuhotplug.h (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index f2fb54938ee6..78989f20420f 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -16,6 +16,7 @@ #include #include #include +#include struct device; struct device_node; @@ -27,6 +28,9 @@ struct cpu { struct device dev; }; +extern void boot_cpu_init(void); +extern void boot_cpu_state_init(void); + extern int register_cpu(struct cpu *cpu, int num); extern struct device *get_cpu_device(unsigned cpu); extern bool cpu_is_hotpluggable(unsigned cpu); @@ -267,11 +271,6 @@ static inline int disable_nonboot_cpus(void) { return 0; } static inline void enable_nonboot_cpus(void) {} #endif /* !CONFIG_PM_SLEEP_SMP */ -enum cpuhp_state { - CPUHP_OFFLINE, - CPUHP_ONLINE, -}; - void cpu_startup_entry(enum cpuhp_state state); void cpu_idle_poll_ctrl(bool enable); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h new file mode 100644 index 000000000000..d55c9e64acd7 --- /dev/null +++ b/include/linux/cpuhotplug.h @@ -0,0 +1,13 @@ +#ifndef __CPUHOTPLUG_H +#define __CPUHOTPLUG_H + +enum cpuhp_state { + CPUHP_OFFLINE, + CPUHP_CREATE_THREADS, + CPUHP_NOTIFY_PREPARE, + CPUHP_BRINGUP_CPU, + CPUHP_NOTIFY_ONLINE, + CPUHP_ONLINE, +}; + +#endif -- cgit v1.2.3 From 4baa0afc6719cbf36a1e08551484a641926b3fd1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 26 Feb 2016 18:43:29 +0000 Subject: cpu/hotplug: Convert the hotplugged cpu work to a state machine Move the functions which need to run on the hotplugged processor into a state machine array and let the code iterate through these functions. In a later state, this will grow synchronization points between the control processor and the hotplugged processor, so we can move the various architecture implementations of the synchronizations to the core. Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Rik van Riel Cc: Rafael Wysocki Cc: "Srivatsa S. Bhat" Cc: Peter Zijlstra Cc: Arjan van de Ven Cc: Sebastian Siewior Cc: Rusty Russell Cc: Steven Rostedt Cc: Oleg Nesterov Cc: Tejun Heo Cc: Andrew Morton Cc: Paul McKenney Cc: Linus Torvalds Cc: Paul Turner Link: http://lkml.kernel.org/r/20160226182340.770651526@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index d55c9e64acd7..d9303cca83d3 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -6,6 +6,10 @@ enum cpuhp_state { CPUHP_CREATE_THREADS, CPUHP_NOTIFY_PREPARE, CPUHP_BRINGUP_CPU, + CPUHP_AP_OFFLINE, + CPUHP_AP_NOTIFY_STARTING, + CPUHP_AP_ONLINE, + CPUHP_TEARDOWN_CPU, CPUHP_NOTIFY_ONLINE, CPUHP_ONLINE, }; -- cgit v1.2.3 From 5b7aa87e0482be768486e0c2277aa4122487eb9d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 26 Feb 2016 18:43:33 +0000 Subject: cpu/hotplug: Implement setup/removal interface Implement function which allow to setup/remove hotplug state callbacks. The default behaviour for setup is to call the startup function for this state for (or on) all cpus which have a hotplug state >= the installed state. The default behaviour for removal is to call the teardown function for this state for (or on) all cpus which have a hotplug state >= the installed state. This includes rollback to the previous state in case of failure. A special state is CPUHP_ONLINE_DYN. Its for dynamically registering a hotplug callback pair. This is for drivers which have no dependencies to avoid that we need to allocate CPUHP states for each of them For both setup and remove helper functions are provided, which prevent the core to issue the callbacks. This simplifies the conversion of existing hotplug notifiers. [ Dynamic registering implemented by Sebastian Siewior ] Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Rik van Riel Cc: Rafael Wysocki Cc: "Srivatsa S. Bhat" Cc: Peter Zijlstra Cc: Arjan van de Ven Cc: Sebastian Siewior Cc: Rusty Russell Cc: Steven Rostedt Cc: Oleg Nesterov Cc: Tejun Heo Cc: Andrew Morton Cc: Paul McKenney Cc: Linus Torvalds Cc: Paul Turner Link: http://lkml.kernel.org/r/20160226182341.103464877@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 67 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index d9303cca83d3..29935261b26d 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -11,7 +11,74 @@ enum cpuhp_state { CPUHP_AP_ONLINE, CPUHP_TEARDOWN_CPU, CPUHP_NOTIFY_ONLINE, + CPUHP_ONLINE_DYN, + CPUHP_ONLINE_DYN_END = CPUHP_ONLINE_DYN + 30, CPUHP_ONLINE, }; +int __cpuhp_setup_state(enum cpuhp_state state, const char *name, bool invoke, + int (*startup)(unsigned int cpu), + int (*teardown)(unsigned int cpu)); + +/** + * cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks + * @state: The state for which the calls are installed + * @name: Name of the callback (will be used in debug output) + * @startup: startup callback function + * @teardown: teardown callback function + * + * Installs the callback functions and invokes the startup callback on + * the present cpus which have already reached the @state. + */ +static inline int cpuhp_setup_state(enum cpuhp_state state, + const char *name, + int (*startup)(unsigned int cpu), + int (*teardown)(unsigned int cpu)) +{ + return __cpuhp_setup_state(state, name, true, startup, teardown); +} + +/** + * cpuhp_setup_state_nocalls - Setup hotplug state callbacks without calling the + * callbacks + * @state: The state for which the calls are installed + * @name: Name of the callback. + * @startup: startup callback function + * @teardown: teardown callback function + * + * Same as @cpuhp_setup_state except that no calls are executed are invoked + * during installation of this callback. NOP if SMP=n or HOTPLUG_CPU=n. + */ +static inline int cpuhp_setup_state_nocalls(enum cpuhp_state state, + const char *name, + int (*startup)(unsigned int cpu), + int (*teardown)(unsigned int cpu)) +{ + return __cpuhp_setup_state(state, name, false, startup, teardown); +} + +void __cpuhp_remove_state(enum cpuhp_state state, bool invoke); + +/** + * cpuhp_remove_state - Remove hotplug state callbacks and invoke the teardown + * @state: The state for which the calls are removed + * + * Removes the callback functions and invokes the teardown callback on + * the present cpus which have already reached the @state. + */ +static inline void cpuhp_remove_state(enum cpuhp_state state) +{ + __cpuhp_remove_state(state, true); +} + +/** + * cpuhp_remove_state_nocalls - Remove hotplug state callbacks without invoking + * teardown + * @state: The state for which the calls are removed + */ +static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state) +{ + __cpuhp_remove_state(state, false); +} + #endif -- cgit v1.2.3 From 949338e35131c551f7bf54f48a2e3a227af6721b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 26 Feb 2016 18:43:35 +0000 Subject: cpu/hotplug: Move scheduler cpu_online notifier to hotplug core Move the scheduler cpu online notifier part to the hotplug core. This is anyway the highest priority callback and we need that functionality right now for the next changes. Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Rik van Riel Cc: Rafael Wysocki Cc: "Srivatsa S. Bhat" Cc: Peter Zijlstra Cc: Arjan van de Ven Cc: Sebastian Siewior Cc: Rusty Russell Cc: Steven Rostedt Cc: Oleg Nesterov Cc: Tejun Heo Cc: Andrew Morton Cc: Paul McKenney Cc: Linus Torvalds Cc: Paul Turner Link: http://lkml.kernel.org/r/20160226182341.200791046@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 29935261b26d..2f2e5d9711c4 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -10,6 +10,7 @@ enum cpuhp_state { CPUHP_AP_NOTIFY_STARTING, CPUHP_AP_ONLINE, CPUHP_TEARDOWN_CPU, + CPUHP_CPU_SET_ACTIVE, CPUHP_NOTIFY_ONLINE, CPUHP_ONLINE_DYN, CPUHP_ONLINE_DYN_END = CPUHP_ONLINE_DYN + 30, -- cgit v1.2.3 From 931ef163309ee955611f287dc65248b39a65fc9d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 26 Feb 2016 18:43:36 +0000 Subject: cpu/hotplug: Unpark smpboot threads from the state machine Handle the smpboot threads in the state machine. Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Rik van Riel Cc: Rafael Wysocki Cc: "Srivatsa S. Bhat" Cc: Peter Zijlstra Cc: Arjan van de Ven Cc: Sebastian Siewior Cc: Rusty Russell Cc: Steven Rostedt Cc: Oleg Nesterov Cc: Tejun Heo Cc: Andrew Morton Cc: Paul McKenney Cc: Linus Torvalds Cc: Paul Turner Link: http://lkml.kernel.org/r/20160226182341.295777684@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpu.h | 7 +------ include/linux/cpuhotplug.h | 1 + 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 78989f20420f..83f35767016d 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -78,7 +78,7 @@ enum { /* migration should happen before other stuff but after perf */ CPU_PRI_PERF = 20, CPU_PRI_MIGRATION = 10, - CPU_PRI_SMPBOOT = 9, + /* bring up workqueues before normal notifiers and down after */ CPU_PRI_WORKQUEUE_UP = 5, CPU_PRI_WORKQUEUE_DOWN = -5, @@ -172,7 +172,6 @@ static inline void __unregister_cpu_notifier(struct notifier_block *nb) } #endif -void smpboot_thread_init(void); int cpu_up(unsigned int cpu); void notify_cpu_starting(unsigned int cpu); extern void cpu_maps_update_begin(void); @@ -221,10 +220,6 @@ static inline void cpu_notifier_register_done(void) { } -static inline void smpboot_thread_init(void) -{ -} - #endif /* CONFIG_SMP */ extern struct bus_type cpu_subsys; diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 2f2e5d9711c4..38679106fddd 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -11,6 +11,7 @@ enum cpuhp_state { CPUHP_AP_ONLINE, CPUHP_TEARDOWN_CPU, CPUHP_CPU_SET_ACTIVE, + CPUHP_SMPBOOT_THREADS, CPUHP_NOTIFY_ONLINE, CPUHP_ONLINE_DYN, CPUHP_ONLINE_DYN_END = CPUHP_ONLINE_DYN + 30, -- cgit v1.2.3 From 1cf4f629d9d246519a1e76c021806f2a51ddba4d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 26 Feb 2016 18:43:39 +0000 Subject: cpu/hotplug: Move online calls to hotplugged cpu Let the hotplugged cpu invoke the setup/teardown callbacks (CPU_ONLINE/CPU_DOWN_PREPARE) itself. Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Rik van Riel Cc: Rafael Wysocki Cc: "Srivatsa S. Bhat" Cc: Peter Zijlstra Cc: Arjan van de Ven Cc: Sebastian Siewior Cc: Rusty Russell Cc: Steven Rostedt Cc: Oleg Nesterov Cc: Tejun Heo Cc: Andrew Morton Cc: Paul McKenney Cc: Linus Torvalds Cc: Paul Turner Link: http://lkml.kernel.org/r/20160226182341.536364371@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 38679106fddd..8a715bb1e192 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -11,10 +11,12 @@ enum cpuhp_state { CPUHP_AP_ONLINE, CPUHP_TEARDOWN_CPU, CPUHP_CPU_SET_ACTIVE, - CPUHP_SMPBOOT_THREADS, - CPUHP_NOTIFY_ONLINE, - CPUHP_ONLINE_DYN, - CPUHP_ONLINE_DYN_END = CPUHP_ONLINE_DYN + 30, + CPUHP_KICK_AP_THREAD, + CPUHP_BP_ONLINE, + CPUHP_AP_SMPBOOT_THREADS, + CPUHP_AP_NOTIFY_ONLINE, + CPUHP_AP_ONLINE_DYN, + CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30, CPUHP_ONLINE, }; -- cgit v1.2.3 From fc6d73d67436e7784758a831227bd019547a3f73 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 26 Feb 2016 18:43:40 +0000 Subject: arch/hotplug: Call into idle with a proper state Let the non boot cpus call into idle with the corresponding hotplug state, so the hotplug core can handle the further bringup. That's a first step to convert the boot side of the hotplugged cpus to do all the synchronization with the other side through the state machine. For now it'll only start the hotplug thread and kick the full bringup of the cpu. Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Rik van Riel Cc: Rafael Wysocki Cc: "Srivatsa S. Bhat" Cc: Peter Zijlstra Cc: Arjan van de Ven Cc: Sebastian Siewior Cc: Rusty Russell Cc: Steven Rostedt Cc: Oleg Nesterov Cc: Tejun Heo Cc: Andrew Morton Cc: Paul McKenney Cc: Linus Torvalds Cc: Paul Turner Link: http://lkml.kernel.org/r/20160226182341.614102639@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 8a715bb1e192..4aa263adc536 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -13,6 +13,7 @@ enum cpuhp_state { CPUHP_CPU_SET_ACTIVE, CPUHP_KICK_AP_THREAD, CPUHP_BP_ONLINE, + CPUHP_AP_ONLINE_IDLE, CPUHP_AP_SMPBOOT_THREADS, CPUHP_AP_NOTIFY_ONLINE, CPUHP_AP_ONLINE_DYN, -- cgit v1.2.3 From 8df3e07e7f21f2ed8d001e6fabf9505946b438aa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 26 Feb 2016 18:43:41 +0000 Subject: cpu/hotplug: Let upcoming cpu bring itself fully up Let the upcoming cpu kick the hotplug thread and let itself complete the bringup. That way the controll side can just wait for the completion or later when we made the hotplug machinery async not care at all. Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Rik van Riel Cc: Rafael Wysocki Cc: "Srivatsa S. Bhat" Cc: Peter Zijlstra Cc: Arjan van de Ven Cc: Sebastian Siewior Cc: Rusty Russell Cc: Steven Rostedt Cc: Oleg Nesterov Cc: Tejun Heo Cc: Andrew Morton Cc: Paul McKenney Cc: Linus Torvalds Cc: Paul Turner Link: http://lkml.kernel.org/r/20160226182341.697655464@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 4aa263adc536..ad5d7fcb0130 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -10,9 +10,6 @@ enum cpuhp_state { CPUHP_AP_NOTIFY_STARTING, CPUHP_AP_ONLINE, CPUHP_TEARDOWN_CPU, - CPUHP_CPU_SET_ACTIVE, - CPUHP_KICK_AP_THREAD, - CPUHP_BP_ONLINE, CPUHP_AP_ONLINE_IDLE, CPUHP_AP_SMPBOOT_THREADS, CPUHP_AP_NOTIFY_ONLINE, @@ -86,4 +83,10 @@ static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state) __cpuhp_remove_state(state, false); } +#ifdef CONFIG_SMP +void cpuhp_online_idle(enum cpuhp_state state); +#else +static inline void cpuhp_online_idle(enum cpuhp_state state) { } +#endif + #endif -- cgit v1.2.3 From e69aab13117efc1987620090e539b4ebeb33a04c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 26 Feb 2016 18:43:43 +0000 Subject: cpu/hotplug: Make wait for dead cpu completion based Kill the busy spinning on the control side and just wait for the hotplugged cpu to tell that it reached the dead state. Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Rik van Riel Cc: Rafael Wysocki Cc: "Srivatsa S. Bhat" Cc: Peter Zijlstra Cc: Arjan van de Ven Cc: Sebastian Siewior Cc: Rusty Russell Cc: Steven Rostedt Cc: Oleg Nesterov Cc: Tejun Heo Cc: Andrew Morton Cc: Paul McKenney Cc: Linus Torvalds Cc: Paul Turner Link: http://lkml.kernel.org/r/20160226182341.776157858@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpu.h | 5 +++-- include/linux/cpuhotplug.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 83f35767016d..91a48d1b4ca0 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -276,14 +276,15 @@ void arch_cpu_idle_enter(void); void arch_cpu_idle_exit(void); void arch_cpu_idle_dead(void); -DECLARE_PER_CPU(bool, cpu_dead_idle); - int cpu_report_state(int cpu); int cpu_check_up_prepare(int cpu); void cpu_set_state_online(int cpu); #ifdef CONFIG_HOTPLUG_CPU bool cpu_wait_death(unsigned int cpu, int seconds); bool cpu_report_death(void); +void cpuhp_report_idle_dead(void); +#else +static inline void cpuhp_report_idle_dead(void) { } #endif /* #ifdef CONFIG_HOTPLUG_CPU */ #endif /* _LINUX_CPU_H_ */ diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index ad5d7fcb0130..5d68e15e46b7 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -6,6 +6,7 @@ enum cpuhp_state { CPUHP_CREATE_THREADS, CPUHP_NOTIFY_PREPARE, CPUHP_BRINGUP_CPU, + CPUHP_AP_IDLE_DEAD, CPUHP_AP_OFFLINE, CPUHP_AP_NOTIFY_STARTING, CPUHP_AP_ONLINE, -- cgit v1.2.3 From 27d50c7eeb0f03c3d3ca72aac4d2dd487ca1f3f0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 26 Feb 2016 18:43:44 +0000 Subject: rcu: Make CPU_DYING_IDLE an explicit call Make the RCU CPU_DYING_IDLE callback an explicit function call, so it gets invoked at the proper place. Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Rik van Riel Cc: Rafael Wysocki Cc: "Srivatsa S. Bhat" Cc: Peter Zijlstra Cc: Arjan van de Ven Cc: Sebastian Siewior Cc: Rusty Russell Cc: Steven Rostedt Cc: Oleg Nesterov Cc: Tejun Heo Cc: Andrew Morton Cc: Paul McKenney Cc: Linus Torvalds Cc: Paul Turner Link: http://lkml.kernel.org/r/20160226182341.870167933@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpu.h | 4 +--- include/linux/notifier.h | 2 ++ include/linux/rcupdate.h | 4 +--- 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 91a48d1b4ca0..f9b1fab4388a 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -101,9 +101,7 @@ enum { * Called on the new cpu, just before * enabling interrupts. Must not sleep, * must not fail */ -#define CPU_DYING_IDLE 0x000B /* CPU (unsigned)v dying, reached - * idle loop. */ -#define CPU_BROKEN 0x000C /* CPU (unsigned)v did not die properly, +#define CPU_BROKEN 0x000B /* CPU (unsigned)v did not die properly, * perhaps due to preemption. */ /* Used for CPU hotplug events occurring while tasks are frozen due to a suspend diff --git a/include/linux/notifier.h b/include/linux/notifier.h index d14a4c362465..4149868de4e6 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -47,6 +47,8 @@ * runtime initialization. */ +struct notifier_block; + typedef int (*notifier_fn_t)(struct notifier_block *nb, unsigned long action, void *data); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 14e6f47ee16f..fc46fe3ea259 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -332,9 +332,7 @@ void rcu_init(void); void rcu_sched_qs(void); void rcu_bh_qs(void); void rcu_check_callbacks(int user); -struct notifier_block; -int rcu_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu); +void rcu_report_dead(unsigned int cpu); #ifndef CONFIG_TINY_RCU void rcu_end_inkernel_boot(void); -- cgit v1.2.3 From 9da0f49c8767cc0ef6101cb21156cf4380ed50dd Mon Sep 17 00:00:00 2001 From: "Christopher S. Hall" Date: Mon, 22 Feb 2016 03:15:20 -0800 Subject: time: Add timekeeping snapshot code capturing system time and counter In the current timekeeping code there isn't any interface to atomically capture the current relationship between the system counter and system time. ktime_get_snapshot() returns this triple (counter, monotonic raw, realtime) in the system_time_snapshot struct. Cc: Prarit Bhargava Cc: Richard Cochran Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andy Lutomirski Cc: kevin.b.stanton@intel.com Cc: kevin.j.clarke@intel.com Cc: hpa@zytor.com Cc: jeffrey.t.kirsher@intel.com Cc: netdev@vger.kernel.org Reviewed-by: Thomas Gleixner Signed-off-by: Christopher S. Hall [jstultz: Moved structure definitions around to clean things up, fixed cycles_t/cycle_t confusion.] Signed-off-by: John Stultz --- include/linux/timekeeping.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index ec89d846324c..7817591af46f 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -266,6 +266,24 @@ extern void timekeeping_inject_sleeptime64(struct timespec64 *delta); extern void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw, struct timespec64 *ts_real); +/* + * struct system_time_snapshot - simultaneous raw/real time capture with + * counter value + * @cycles: Clocksource counter value to produce the system times + * @real: Realtime system time + * @raw: Monotonic raw system time + */ +struct system_time_snapshot { + cycle_t cycles; + ktime_t real; + ktime_t raw; +}; + +/* + * Simultaneously snapshot realtime and monotonic raw clocks + */ +extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot); + /* * Persistent clock related interfaces */ -- cgit v1.2.3 From ba26621e63ce6dc481d90ab9f6902e058d4ea39a Mon Sep 17 00:00:00 2001 From: "Christopher S. Hall" Date: Mon, 22 Feb 2016 03:15:21 -0800 Subject: time: Remove duplicated code in ktime_get_raw_and_real() The code in ktime_get_snapshot() is a superset of the code in ktime_get_raw_and_real() code. Further, ktime_get_raw_and_real() is called only by the PPS code, pps_get_ts(). Consolidate the pps_get_ts() code into a single function calling ktime_get_snapshot() and eliminate ktime_get_raw_and_real(). A side effect of this is that the raw and real results of pps_get_ts() correspond to exactly the same clock cycle. Previously these values represented separate reads of the system clock. Cc: Prarit Bhargava Cc: Richard Cochran Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andy Lutomirski Cc: kevin.b.stanton@intel.com Cc: kevin.j.clarke@intel.com Cc: hpa@zytor.com Cc: jeffrey.t.kirsher@intel.com Cc: netdev@vger.kernel.org Reviewed-by: Thomas Gleixner Signed-off-by: Christopher S. Hall Signed-off-by: John Stultz --- include/linux/pps_kernel.h | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h index 54bf1484d41f..35ac903956c7 100644 --- a/include/linux/pps_kernel.h +++ b/include/linux/pps_kernel.h @@ -111,22 +111,17 @@ static inline void timespec_to_pps_ktime(struct pps_ktime *kt, kt->nsec = ts.tv_nsec; } -#ifdef CONFIG_NTP_PPS - static inline void pps_get_ts(struct pps_event_time *ts) { - ktime_get_raw_and_real_ts64(&ts->ts_raw, &ts->ts_real); -} + struct system_time_snapshot snap; -#else /* CONFIG_NTP_PPS */ - -static inline void pps_get_ts(struct pps_event_time *ts) -{ - ktime_get_real_ts64(&ts->ts_real); + ktime_get_snapshot(&snap); + ts->ts_real = ktime_to_timespec64(snap.real); +#ifdef CONFIG_NTP_PPS + ts->ts_raw = ktime_to_timespec64(snap.raw); +#endif } -#endif /* CONFIG_NTP_PPS */ - /* Subtract known time delay from PPS event time(s) */ static inline void pps_sub_ts(struct pps_event_time *ts, struct timespec64 delta) { -- cgit v1.2.3 From 8006c24595cab106bcb9da12d35e32e14ff492df Mon Sep 17 00:00:00 2001 From: "Christopher S. Hall" Date: Mon, 22 Feb 2016 03:15:22 -0800 Subject: time: Add driver cross timestamp interface for higher precision time synchronization ACKNOWLEDGMENT: cross timestamp code was developed by Thomas Gleixner . It has changed considerably and any mistakes are mine. The precision with which events on multiple networked systems can be synchronized using, as an example, PTP (IEEE 1588, 802.1AS) is limited by the precision of the cross timestamps between the system clock and the device (timestamp) clock. Precision here is the degree of simultaneity when capturing the cross timestamp. Currently the PTP cross timestamp is captured in software using the PTP device driver ioctl PTP_SYS_OFFSET. Reads of the device clock are interleaved with reads of the realtime clock. At best, the precision of this cross timestamp is on the order of several microseconds due to software latencies. Sub-microsecond precision is required for industrial control and some media applications. To achieve this level of precision hardware supported cross timestamping is needed. The function get_device_system_crosstimestamp() allows device drivers to return a cross timestamp with system time properly scaled to nanoseconds. The realtime value is needed to discipline that clock using PTP and the monotonic raw value is used for applications that don't require a "real" time, but need an unadjusted clock time. The get_device_system_crosstimestamp() code calls back into the driver to ensure that the system counter is within the current timekeeping update interval. Modern Intel hardware provides an Always Running Timer (ART) which is exactly related to TSC through a known frequency ratio. The ART is routed to devices on the system and is used to precisely and simultaneously capture the device clock with the ART. Cc: Prarit Bhargava Cc: Richard Cochran Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andy Lutomirski Cc: kevin.b.stanton@intel.com Cc: kevin.j.clarke@intel.com Cc: hpa@zytor.com Cc: jeffrey.t.kirsher@intel.com Cc: netdev@vger.kernel.org Reviewed-by: Thomas Gleixner Signed-off-by: Christopher S. Hall [jstultz: Reworked to remove extra structures and simplify calling] Signed-off-by: John Stultz --- include/linux/timekeeping.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 7817591af46f..4a2ca65fc778 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -279,6 +279,41 @@ struct system_time_snapshot { ktime_t raw; }; +/* + * struct system_device_crosststamp - system/device cross-timestamp + * (syncronized capture) + * @device: Device time + * @sys_realtime: Realtime simultaneous with device time + * @sys_monoraw: Monotonic raw simultaneous with device time + */ +struct system_device_crosststamp { + ktime_t device; + ktime_t sys_realtime; + ktime_t sys_monoraw; +}; + +/* + * struct system_counterval_t - system counter value with the pointer to the + * corresponding clocksource + * @cycles: System counter value + * @cs: Clocksource corresponding to system counter value. Used by + * timekeeping code to verify comparibility of two cycle values + */ +struct system_counterval_t { + cycle_t cycles; + struct clocksource *cs; +}; + +/* + * Get cross timestamp between system clock and device clock + */ +extern int get_device_system_crosststamp( + int (*get_time_fn)(ktime_t *device_time, + struct system_counterval_t *system_counterval, + void *ctx), + void *ctx, + struct system_device_crosststamp *xtstamp); + /* * Simultaneously snapshot realtime and monotonic raw clocks */ -- cgit v1.2.3 From 2c756feb18d9ec258dbb3a3d11c47e28820690d7 Mon Sep 17 00:00:00 2001 From: "Christopher S. Hall" Date: Mon, 22 Feb 2016 03:15:23 -0800 Subject: time: Add history to cross timestamp interface supporting slower devices Another representative use case of time sync and the correlated clocksource (in addition to PTP noted above) is PTP synchronized audio. In a streaming application, as an example, samples will be sent and/or received by multiple devices with a presentation time that is in terms of the PTP master clock. Synchronizing the audio output on these devices requires correlating the audio clock with the PTP master clock. The more precise this correlation is, the better the audio quality (i.e. out of sync audio sounds bad). From an application standpoint, to correlate the PTP master clock with the audio device clock, the system clock is used as a intermediate timebase. The transforms such an application would perform are: System Clock <-> Audio clock System Clock <-> Network Device Clock [<-> PTP Master Clock] Modern Intel platforms can perform a more accurate cross timestamp in hardware (ART,audio device clock). The audio driver requires ART->system time transforms -- the same as required for the network driver. These platforms offload audio processing (including cross-timestamps) to a DSP which to ensure uninterrupted audio processing, communicates and response to the host only once every millsecond. As a result is takes up to a millisecond for the DSP to receive a request, the request is processed by the DSP, the audio output hardware is polled for completion, the result is copied into shared memory, and the host is notified. All of these operation occur on a millisecond cadence. This transaction requires about 2 ms, but under heavier workloads it may take up to 4 ms. Adding a history allows these slow devices the option of providing an ART value outside of the current interval. In this case, the callback provided is an accessor function for the previously obtained counter value. If get_system_device_crosststamp() receives a counter value previous to cycle_last, it consults the history provided as an argument in history_ref and interpolates the realtime and monotonic raw system time using the provided counter value. If there are any clock discontinuities, e.g. from calling settimeofday(), the monotonic raw time is interpolated in the usual way, but the realtime clock time is adjusted by scaling the monotonic raw adjustment. When an accessor function is used a history argument *must* be provided. The history is initialized using ktime_get_snapshot() and must be called before the counter values are read. Cc: Prarit Bhargava Cc: Richard Cochran Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andy Lutomirski Cc: kevin.b.stanton@intel.com Cc: kevin.j.clarke@intel.com Cc: hpa@zytor.com Cc: jeffrey.t.kirsher@intel.com Cc: netdev@vger.kernel.org Reviewed-by: Thomas Gleixner Signed-off-by: Christopher S. Hall [jstultz: Fixed up cycles_t/cycle_t type confusion] Signed-off-by: John Stultz --- include/linux/timekeeper_internal.h | 2 ++ include/linux/timekeeping.h | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 25247220b4b7..e88005459035 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -50,6 +50,7 @@ struct tk_read_base { * @offs_tai: Offset clock monotonic -> clock tai * @tai_offset: The current UTC to TAI offset in seconds * @clock_was_set_seq: The sequence number of clock was set events + * @cs_was_changed_seq: The sequence number of clocksource change events * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second * @raw_time: Monotonic raw base time in timespec64 format * @cycle_interval: Number of clock cycles in one NTP interval @@ -91,6 +92,7 @@ struct timekeeper { ktime_t offs_tai; s32 tai_offset; unsigned int clock_was_set_seq; + u8 cs_was_changed_seq; ktime_t next_leap_ktime; struct timespec64 raw_time; diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 4a2ca65fc778..96f37bee3bc1 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -272,11 +272,15 @@ extern void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw, * @cycles: Clocksource counter value to produce the system times * @real: Realtime system time * @raw: Monotonic raw system time + * @clock_was_set_seq: The sequence number of clock was set events + * @cs_was_changed_seq: The sequence number of clocksource change events */ struct system_time_snapshot { cycle_t cycles; ktime_t real; ktime_t raw; + unsigned int clock_was_set_seq; + u8 cs_was_changed_seq; }; /* @@ -312,6 +316,7 @@ extern int get_device_system_crosststamp( struct system_counterval_t *system_counterval, void *ctx), void *ctx, + struct system_time_snapshot *history, struct system_device_crosststamp *xtstamp); /* -- cgit v1.2.3 From 82e88ff1ea948d83125a8aaa7c9809f03ccc500f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 3 Mar 2016 11:11:12 +0100 Subject: hrtimer: Revert CLOCK_MONOTONIC_RAW support Revert commits: a6e707ddbdf1: KVM: arm/arm64: timer: Switch to CLOCK_MONOTONIC_RAW 9006a01829a5: hrtimer: Catch illegal clockids 9c808765e88e: hrtimer: Add support for CLOCK_MONOTONIC_RAW Marc found out, that there are fundamental issues with that patch series because __hrtimer_get_next_event() and hrtimer_forward() need support for CLOCK_MONOTONIC_RAW. Nothing which is easily fixed, so revert the whole lot. Reported-by: Marc Zyngier Link: http://lkml.kernel.org/r/56D6CEF0.8060607@arm.com Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index a6d64af5e73f..76dd4f0da5ca 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -151,7 +151,6 @@ enum hrtimer_base_type { HRTIMER_BASE_REALTIME, HRTIMER_BASE_BOOTTIME, HRTIMER_BASE_TAI, - HRTIMER_BASE_MONOTONIC_RAW, HRTIMER_MAX_CLOCK_BASES, }; -- cgit v1.2.3 From 719f1aa4a67199a3c4c68a03f94e5ec44d9d5f82 Mon Sep 17 00:00:00 2001 From: "Christopher S. Hall" Date: Mon, 22 Feb 2016 03:15:25 -0800 Subject: ptp: Add PTP_SYS_OFFSET_PRECISE for driver crosstimestamping Currently, network /system cross-timestamping is performed in the PTP_SYS_OFFSET ioctl. The PTP clock driver reads gettimeofday() and the gettime64() callback provided by the driver. The cross-timestamp is best effort where the latency between the capture of system time (getnstimeofday()) and the device time (driver callback) may be significant. The getcrosststamp() callback and corresponding PTP_SYS_OFFSET_PRECISE ioctl allows the driver to perform this device/system correlation when for example cross timestamp hardware is available. Modern Intel systems can do this for onboard Ethernet controllers using the ART counter. There is virtually zero latency between captures of the ART and network device clock. The capabilities ioctl (PTP_CLOCK_GETCAPS), is augmented allowing applications to query whether or not drivers implement the getcrosststamp callback, providing more precise cross timestamping. Cc: Prarit Bhargava Cc: Richard Cochran Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andy Lutomirski Cc: kevin.b.stanton@intel.com Cc: kevin.j.clarke@intel.com Cc: hpa@zytor.com Cc: jeffrey.t.kirsher@intel.com Cc: netdev@vger.kernel.org Acked-by: Richard Cochran Signed-off-by: Christopher S. Hall [jstultz: Commit subject tweaks] Signed-off-by: John Stultz --- include/linux/ptp_clock_kernel.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index b8b73066d137..6b15e168148a 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -38,6 +38,7 @@ struct ptp_clock_request { }; }; +struct system_device_crosststamp; /** * struct ptp_clock_info - decribes a PTP hardware clock * @@ -67,6 +68,11 @@ struct ptp_clock_request { * @gettime64: Reads the current time from the hardware clock. * parameter ts: Holds the result. * + * @getcrosststamp: Reads the current time from the hardware clock and + * system clock simultaneously. + * parameter cts: Contains timestamp (device,system) pair, + * where system time is realtime and monotonic. + * * @settime64: Set the current time on the hardware clock. * parameter ts: Time value to set. * @@ -105,6 +111,8 @@ struct ptp_clock_info { int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta); int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts); + int (*getcrosststamp)(struct ptp_clock_info *ptp, + struct system_device_crosststamp *cts); int (*settime64)(struct ptp_clock_info *p, const struct timespec64 *ts); int (*enable)(struct ptp_clock_info *ptp, struct ptp_clock_request *request, int on); -- cgit v1.2.3